Add dqn and ac for VM scheduling#358
Add dqn and ac for VM scheduling#358MicrosoftHam wants to merge 3 commits intomicrosoft:v0.2_rl_refinementfrom
Conversation
examples/vm_scheduling/reinforcement_learning/ac/agent/ac_net.py
Outdated
Show resolved
Hide resolved
|
|
||
| action_prob = Categorical(self.forward(states, critic=False)[0] * legal_action) # (batch_size, action_space_size) | ||
| action = action_prob.sample() | ||
| log_p = action_prob.log_prob(action) |
There was a problem hiding this comment.
I'm wondering should we use the action_prob without multiplying legal_action to calculate the log_p or not?
Similar concern as the reward shaping case we discussed several days ago (when only the postpone is valid but model output prefers some PM).
examples/vm_scheduling/reinforcement_learning/ac/agent/models/combine_net.py
Show resolved
Hide resolved
examples/vm_scheduling/reinforcement_learning/ac/agent/models/combine_net.py
Outdated
Show resolved
Hide resolved
examples/vm_scheduling/reinforcement_learning/ac/agent/models/combine_net.py
Outdated
Show resolved
Hide resolved
examples/vm_scheduling/reinforcement_learning/ac/agent/models/combine_net.py
Outdated
Show resolved
Hide resolved
examples/vm_scheduling/reinforcement_learning/ac/agent/models/combine_net.py
Outdated
Show resolved
Hide resolved
| class SequenceNet(AbsBlock): | ||
| """Fully connected network with optional batch normalization, activation and dropout components. | ||
|
|
||
| Args: |
There was a problem hiding this comment.
not corresponds to the actual parameters. Absence better than errors.
| from maro.rl import AbsBlock | ||
|
|
||
|
|
||
| class SequenceNet(AbsBlock): |
| from maro.rl import AbsBlock | ||
|
|
||
|
|
||
| class SequenceNet(AbsBlock): |
There was a problem hiding this comment.
There doesn't seem to be a need to subclass AbsBlock. Can you try inheriting from AbsCoreModel and use PM and VM as components?
| self.component["critic"](states) if critic else None | ||
| ) | ||
|
|
||
| def get_action(self, states, legal_action, training=True): |
There was a problem hiding this comment.
Can you make legal_action a part of states so we don't have to change the call interface? There is no restriction on what type states should be in forward.
There was a problem hiding this comment.
use a tuple (states, legal_action) as states to input the function
| self._skip_connection = skip_connection | ||
|
|
||
| # build the pm sequence net | ||
| pm_dims = [self._pm_input_dim*self._pm_num] + self._hidden_dims[:2] |
There was a problem hiding this comment.
pm_dims = [self._pm_input_dim * self._pm_num] + self._hidden_dims[:2]
| self._name = name | ||
|
|
||
| def forward(self, x): | ||
| pm_info_input = x[:, :self._pm_state_dim].view(-1, self._pm_window_size, self._pm_num * self._pm_input_dim) |
There was a problem hiding this comment.
why previous _pm_state_dim? (why not x.view(...))
| # self._pm_sequence_rnn.flatten_parameters() | ||
| # pm_sequence_feature, _ = self._pm_sequence_rnn(pm_info_feature) | ||
|
|
||
| vm_info_input = x[:, -self._vm_state_dim:].view(-1, self._vm_window_size, self._vm_input_dim) |
There was a problem hiding this comment.
similar question to the pm one
| log_p_new = torch.clamp(log_p_new, min=-20) | ||
|
|
||
| if self.config.clip_ratio is not None: | ||
| ratio = torch.exp(log_p_new - log_p) |
There was a problem hiding this comment.
why design the ratio like this? what's the meaning of it?
There was a problem hiding this comment.
to use the PPO algorithm
| actor_loss = -(torch.min(ratio * advantages, clip_ratio * advantages)).mean() | ||
| else: | ||
| dist = Categorical(action_probs) | ||
| actor_loss = -(log_p_new * advantages + 10 * dist.entropy()).mean() |
There was a problem hiding this comment.
to encourage bigger entropy?
There was a problem hiding this comment.
to prevent the action probability converge too fast
examples/vm_scheduling/reinforcement_learning/ac/agent/vm_ac.py
Outdated
Show resolved
Hide resolved
examples/vm_scheduling/reinforcement_learning/common/__init__.py
Outdated
Show resolved
Hide resolved
| from collections import defaultdict | ||
|
|
||
| from maro.rl import ExperienceSet | ||
| from examples.vm_scheduling.refine_rl.common import VMEnvWrapper |
There was a problem hiding this comment.
is it still runnable? examples.vm_scheduling.refine_rl.common seems not exist (at least in this PR)
There was a problem hiding this comment.
It runnable in my environment, as I have the examples.vm_scheduling.refine_rl.common. So I don't notice this.
| del buf["states"][:-1] | ||
| del buf["actions"][:-1] | ||
| del buf["rewards"][:-1] | ||
| del buf["info"][:-1] |
There was a problem hiding this comment.
return buf["info"][1:] but del buf["info"][:-1]?
There was a problem hiding this comment.
info store the legal_action; In DQN, exp_set should store the next_legal_action just like the next_states. But legal_action is useless in the AC training process. So I use the same treatment as the DQN.
There was a problem hiding this comment.
If it is useless, why add it into buf["info"] then?
| __all__ = [ | ||
| "ACNet", | ||
| "VMActorCritic", | ||
| "CombineNet", "SequenceNet" |
There was a problem hiding this comment.
no CombineNet and SequenceNet anymore
| total_pm_info[:, 3] /= self._max_memory_capacity | ||
|
|
||
| # get the remaining cpu and memory of the pms | ||
| remain_cpu = (1 - total_pm_info[:, 2]).reshape(1, self._pm_num, 1) |
There was a problem hiding this comment.
For the situation where the PM capacity varied, the calculation method is wrong.
remain_cpu = (total_pm_info[:, 0] - total_pm_info[:, 2]) / max_cpu_capacity
remain_memory = (total_pm_info[:, 1] - total_pm_info[:, 3]) / max_memory_capacity
would be better
There was a problem hiding this comment.
You're right, I'll fix it
| self._pm_num = pm_num | ||
| self._durations = durations | ||
| self._vm_states = np.load(vm_state_path) | ||
| self._dim = (pm_num * 2) * pm_window_size + len(VM_ATTRIBUTES) * vm_window_size |
There was a problem hiding this comment.
Add comment for the PM attributes used (why * 2)
There was a problem hiding this comment.
I'll add a variable PM_DIM and explain the variable
| total_pm_info = np.concatenate((remain_cpu, remain_memory), axis=2) | ||
|
|
||
| # get the sequence pms' information | ||
| self._history_pm_state = np.concatenate((self._history_pm_state, total_pm_info), axis=0) |
There was a problem hiding this comment.
The history_pm_state will store the total history information. Although some history information will never be used, it's difficult to remove the numpy data, so I choose store them all.
| vm_info = np.array([ | ||
| decision_event.vm_cpu_cores_requirement, | ||
| decision_event.vm_memory_requirement, | ||
| min(self._durations - env.tick, decision_event.vm_lifetime) / 200, |
There was a problem hiding this comment.
200 or adjusted based on the duration in the config?
There was a problem hiding this comment.
I'll add a new variable to replace the 200
| vm_info[2] = (vm_info[2] * 1.0) / 200 | ||
| vm_info[3] = (self._durations - vm_info[3]) * 1.0 / 200 | ||
| else: | ||
| vm_info = np.zeros(len(VM_ATTRIBUTES), dtype=np.float) |
There was a problem hiding this comment.
since the total_vm_info already initialized as zeros, no need to assign zeros again
There was a problem hiding this comment.
I'll remove the else condition
|
|
||
| total_vm_info[self._vm_window_size - (idx - self._st + 1), :] = vm_info | ||
|
|
||
| self._st = (self._st + 1) % self._vm_states.shape[0] |
There was a problem hiding this comment.
potential bugs.
no vm info/order checking,
if the requests info saved in self._vm_states less than the actual requests, wrong VM states info (re-starting from idx 0) would be used.
There was a problem hiding this comment.
I'll add a judgement to determine whether the vm states and current simulation are matched
| PM_ATTRIBUTES = ["cpu_cores_capacity", "memory_capacity", \ | ||
| "cpu_cores_allocated", "memory_allocated"] | ||
|
|
||
| VM_ATTRIBUTES = ["cpu_cores_requirement", "memory_requirement", "lifetime", "remain_time", "total_income"] |
There was a problem hiding this comment.
Since the latter 3 are actual information got by peeking.
How about adding switches in the config to decide to use them or not?
There was a problem hiding this comment.
I'll add a switch to determine peeking or not
| import pandas as pd | ||
|
|
||
|
|
||
| PM_ATTRIBUTES = ["cpu_cores_capacity", "memory_capacity", \ |
There was a problem hiding this comment.
The PM_ATTRIBUTES here are not aligned with the VM_ATTRIBUTES.
The previous one is used to extract features from the snapshot_list, but the latter one is the list of features used.
There was a problem hiding this comment.
I will add PM_EXTRACTED_ATTRIBUTES to represent the features extracted from the snapshot_list, and PM_ATTRIBUTES to represent the list of features used.
| return total_vm_info | ||
|
|
||
| def _get_legal_pm(self, decision_event, total_pm_info): | ||
| # get the legal pm |
There was a problem hiding this comment.
# get the legal pm
legal_pm = np.zeros(self._pm_num + 1)
legal_pm[self.pm_num] = 1
if len(decision_event.valid_pms) > 0:
remain_cpu_dict = dict()
for pm in decision_event.valid_pms:
# if two pm has same remaining cpu, only choose the one which has smaller id
if total_pm_info[-1, pm, 0] not in remain_cpu_dict.keys():
remain_cpu_dict[total_pm_info[-1, pm, 0]] = 1
legal_pm[pm] = 1
would be enough
| TICKS_PER_HOUR: 12 | ||
|
|
||
| # Path of the vm table data. | ||
| VM_TABLE: "maro/tests/data/vm_scheduling/vmtable_short.bin" |
| VM_TABLE: "maro/tests/data/vm_scheduling/vmtable_short.bin" | ||
|
|
||
| # Path of the cpu readings file. | ||
| CPU_READINGS: "maro/tests/data/vm_scheduling/vm_cpu_readings-file-2-of-short.bin" |
|
|
||
| vm_table, vm, cpu = [], [], [] | ||
|
|
||
| vmtable_data_path = "data/vmtable_10k.csv" |
There was a problem hiding this comment.
where does this file come from?
| @@ -0,0 +1,66 @@ | |||
| import pandas as pd | |||
There was a problem hiding this comment.
Add comments for this file, used for what ?
Or you can add an README for this whole example, to introduce what's the files in this folder, what's the steps (generating data, get VM states, run ...)
| plt.switch_backend('agg') | ||
|
|
||
|
|
||
| class VMLearner: |
There was a problem hiding this comment.
So you implement a new Learner instead of using the original one?
| @staticmethod | ||
| def _get_td_errors( | ||
| q_values, next_q_values, | ||
| rewards, gamma, loss_func |
|
Closed since the RL examples for VM are added in another PR: #375 |
Description
Implement reinforcement learning algorithm for VM scheduling simulation.
Linked issue(s)/Pull request(s)
Type of Change
Related Component
Has Been Tested
Needs Follow Up Actions
Checklist