Skip to content

Commit 214383f

Browse files
lihuoranJinyu-WJinyu Wang
authored
RL benchmark on GYM (#575)
* PPO, SAC, DDPG passed * Explore in SAC * Test GYM on server * Sync server changes * pre-commit * Ready to try on server * . * . * . * . * . * Performance OK * Move to tests * Remove old versions * PPO done * Start to test AC * Start to test SAC * SAC test passed * update for some PR comments; Add a MARKDOWN file (#576) Co-authored-by: Jinyu Wang <[email protected]> * Use FullyConnected to replace mlp * Update action bound * Pre-commit --------- Co-authored-by: Jinyu-W <[email protected]> Co-authored-by: Jinyu Wang <[email protected]>
1 parent eb6324c commit 214383f

File tree

24 files changed

+604
-14
lines changed

24 files changed

+604
-14
lines changed

examples/cim/rl/algorithms/ac.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
actor_net_conf = {
1212
"hidden_dims": [256, 128, 64],
1313
"activation": torch.nn.Tanh,
14+
"output_activation": torch.nn.Tanh,
1415
"softmax": True,
1516
"batch_norm": False,
1617
"head": True,
@@ -19,6 +20,7 @@
1920
"hidden_dims": [256, 128, 64],
2021
"output_dim": 1,
2122
"activation": torch.nn.LeakyReLU,
23+
"output_activation": torch.nn.LeakyReLU,
2224
"softmax": False,
2325
"batch_norm": True,
2426
"head": True,

examples/cim/rl/algorithms/dqn.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
q_net_conf = {
1313
"hidden_dims": [256, 128, 64, 32],
1414
"activation": torch.nn.LeakyReLU,
15+
"output_activation": torch.nn.LeakyReLU,
1516
"softmax": False,
1617
"batch_norm": True,
1718
"skip_connection": False,

examples/cim/rl/algorithms/maddpg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
actor_net_conf = {
1515
"hidden_dims": [256, 128, 64],
1616
"activation": torch.nn.Tanh,
17+
"output_activation": torch.nn.Tanh,
1718
"softmax": True,
1819
"batch_norm": False,
1920
"head": True,
@@ -22,6 +23,7 @@
2223
"hidden_dims": [256, 128, 64],
2324
"output_dim": 1,
2425
"activation": torch.nn.LeakyReLU,
26+
"output_activation": torch.nn.LeakyReLU,
2527
"softmax": False,
2628
"batch_norm": True,
2729
"head": True,

examples/rl/cim.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
66

77
# Run this workflow by executing one of the following commands:
8-
# - python .\examples\rl\run_rl_example.py .\examples\rl\cim.yml
9-
# - (Requires installing MARO from source) maro local run .\examples\rl\cim.yml
8+
# - python ./examples/rl/run.py ./examples/rl/cim.yml
9+
# - (Requires installing MARO from source) maro local run ./examples/rl/cim.yml
1010

1111
job: cim_rl_workflow
1212
scenario_path: "examples/cim/rl"

examples/rl/cim_distributed.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT license.
33

4-
# Example RL config file for CIM scenario.
4+
# Example RL config file for CIM scenario (distributed version).
55
# Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
66

77
# Run this workflow by executing one of the following commands:
8-
# - python .\examples\rl\run_rl_example.py .\examples\rl\cim.yml
9-
# - (Requires installing MARO from source) maro local run .\examples\rl\cim.yml
8+
# - python ./examples/rl/run.py ./examples/rl/cim_distributed.yml
9+
# - (Requires installing MARO from source) maro local run ./examples/rl/cim_distributed.yml
1010

1111
job: cim_rl_workflow
1212
scenario_path: "examples/cim/rl"

examples/rl/vm_scheduling.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
66

77
# Run this workflow by executing one of the following commands:
8-
# - python .\examples\rl\run_rl_example.py .\examples\rl\vm_scheduling.yml
9-
# - (Requires installing MARO from source) maro local run .\examples\rl\vm_scheduling.yml
8+
# - python ./examples/rl/run.py ./examples/rl/vm_scheduling.yml
9+
# - (Requires installing MARO from source) maro local run ./examples/rl/vm_scheduling.yml
1010

1111
job: vm_scheduling_rl_workflow
1212
scenario_path: "examples/vm_scheduling/rl"

examples/vm_scheduling/rl/algorithms/ac.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
actor_net_conf = {
1212
"hidden_dims": [64, 32, 32],
1313
"activation": torch.nn.LeakyReLU,
14+
"output_activation": torch.nn.LeakyReLU,
1415
"softmax": True,
1516
"batch_norm": False,
1617
"head": True,
@@ -19,6 +20,7 @@
1920
critic_net_conf = {
2021
"hidden_dims": [256, 128, 64],
2122
"activation": torch.nn.LeakyReLU,
23+
"output_activation": torch.nn.LeakyReLU,
2224
"softmax": False,
2325
"batch_norm": False,
2426
"head": True,

examples/vm_scheduling/rl/algorithms/dqn.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
q_net_conf = {
1515
"hidden_dims": [64, 128, 256],
1616
"activation": torch.nn.LeakyReLU,
17+
"output_activation": torch.nn.LeakyReLU,
1718
"softmax": False,
1819
"batch_norm": False,
1920
"skip_connection": False,

maro/rl/model/fc_block.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def __init__(
3939
input_dim: int,
4040
output_dim: int,
4141
hidden_dims: List[int],
42-
activation: Optional[Type[torch.nn.Module]] = nn.ReLU,
42+
activation: Optional[Type[torch.nn.Module]] = None,
43+
output_activation: Optional[Type[torch.nn.Module]] = None,
4344
head: bool = False,
4445
softmax: bool = False,
4546
batch_norm: bool = False,
@@ -54,7 +55,8 @@ def __init__(
5455
self._output_dim = output_dim
5556

5657
# network features
57-
self._activation = activation() if activation else None
58+
self._activation = activation if activation else None
59+
self._output_activation = output_activation if output_activation else None
5860
self._head = head
5961
self._softmax = nn.Softmax(dim=1) if softmax else None
6062
self._batch_norm = batch_norm
@@ -70,9 +72,13 @@ def __init__(
7072

7173
# build the net
7274
dims = [self._input_dim] + self._hidden_dims
73-
layers = [self._build_layer(in_dim, out_dim) for in_dim, out_dim in zip(dims, dims[1:])]
75+
layers = [
76+
self._build_layer(in_dim, out_dim, activation=self._activation) for in_dim, out_dim in zip(dims, dims[1:])
77+
]
7478
# top layer
75-
layers.append(self._build_layer(dims[-1], self._output_dim, head=self._head))
79+
layers.append(
80+
self._build_layer(dims[-1], self._output_dim, head=self._head, activation=self._output_activation),
81+
)
7682

7783
self._net = nn.Sequential(*layers)
7884

@@ -101,7 +107,13 @@ def input_dim(self) -> int:
101107
def output_dim(self) -> int:
102108
return self._output_dim
103109

104-
def _build_layer(self, input_dim: int, output_dim: int, head: bool = False) -> nn.Module:
110+
def _build_layer(
111+
self,
112+
input_dim: int,
113+
output_dim: int,
114+
head: bool = False,
115+
activation: Type[torch.nn.Module] = None,
116+
) -> nn.Module:
105117
"""Build a basic layer.
106118
107119
BN -> Linear -> Activation -> Dropout
@@ -110,8 +122,8 @@ def _build_layer(self, input_dim: int, output_dim: int, head: bool = False) -> n
110122
if self._batch_norm:
111123
components.append(("batch_norm", nn.BatchNorm1d(input_dim)))
112124
components.append(("linear", nn.Linear(input_dim, output_dim)))
113-
if not head and self._activation is not None:
114-
components.append(("activation", self._activation))
125+
if not head and activation is not None:
126+
components.append(("activation", activation()))
115127
if not head and self._dropout_p:
116128
components.append(("dropout", nn.Dropout(p=self._dropout_p)))
117129
return nn.Sequential(OrderedDict(components))

0 commit comments

Comments
 (0)