microsoft
diff --git a/‎examples/cim/rl/algorithms/ac.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/cim/rl/algorithms/ac.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/cim/rl/algorithms/dqn.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/cim/rl/algorithms/dqn.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/cim/rl/algorithms/maddpg.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/cim/rl/algorithms/maddpg.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/rl/cim.yml‎
Lines changed: 2 additions & 2 deletions b/‎examples/rl/cim.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/rl/cim_distributed.yml‎
Lines changed: 3 additions & 3 deletions b/‎examples/rl/cim_distributed.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/rl/run_rl_example.py‎ ‎examples/rl/run.py‎examples/rl/run_rl_example.py renamed to examples/rl/run.py b/‎examples/rl/run_rl_example.py‎ ‎examples/rl/run.py‎examples/rl/run_rl_example.py renamed to examples/rl/run.py
diff --git a/‎examples/rl/vm_scheduling.yml‎
Lines changed: 2 additions & 2 deletions b/‎examples/rl/vm_scheduling.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/vm_scheduling/rl/algorithms/ac.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/vm_scheduling/rl/algorithms/ac.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/vm_scheduling/rl/algorithms/dqn.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/vm_scheduling/rl/algorithms/dqn.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎maro/rl/model/fc_block.py‎
Lines changed: 19 additions & 7 deletions b/‎maro/rl/model/fc_block.py‎
Lines changed: 19 additions & 7 deletions
@@ -11,6 +11,7 @@
 actor_net_conf = {
     "hidden_dims": [256, 128, 64],
     "activation": torch.nn.Tanh,
+    "output_activation": torch.nn.Tanh,
     "softmax": True,
     "batch_norm": False,
     "head": True,
@@ -19,6 +20,7 @@
     "hidden_dims": [256, 128, 64],
     "output_dim": 1,
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": False,
     "batch_norm": True,
     "head": True,
 
@@ -12,6 +12,7 @@
 q_net_conf = {
     "hidden_dims": [256, 128, 64, 32],
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": False,
     "batch_norm": True,
     "skip_connection": False,
 
@@ -14,6 +14,7 @@
 actor_net_conf = {
     "hidden_dims": [256, 128, 64],
     "activation": torch.nn.Tanh,
+    "output_activation": torch.nn.Tanh,
     "softmax": True,
     "batch_norm": False,
     "head": True,
@@ -22,6 +23,7 @@
     "hidden_dims": [256, 128, 64],
     "output_dim": 1,
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": False,
     "batch_norm": True,
     "head": True,
 
@@ -5,8 +5,8 @@
 # Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
 
 # Run this workflow by executing one of the following commands:
-# - python .\examples\rl\run_rl_example.py .\examples\rl\cim.yml
-# - (Requires installing MARO from source) maro local run .\examples\rl\cim.yml
+# - python ./examples/rl/run.py ./examples/rl/cim.yml
+# - (Requires installing MARO from source) maro local run ./examples/rl/cim.yml
 
 job: cim_rl_workflow
 scenario_path: "examples/cim/rl"
 
@@ -1,12 +1,12 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-# Example RL config file for CIM scenario.
+# Example RL config file for CIM scenario (distributed version).
 # Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
 
 # Run this workflow by executing one of the following commands:
-# - python .\examples\rl\run_rl_example.py .\examples\rl\cim.yml
-# - (Requires installing MARO from source) maro local run .\examples\rl\cim.yml
+# - python ./examples/rl/run.py ./examples/rl/cim_distributed.yml
+# - (Requires installing MARO from source) maro local run ./examples/rl/cim_distributed.yml
 
 job: cim_rl_workflow
 scenario_path: "examples/cim/rl"
 
@@ -5,8 +5,8 @@
 # Please refer to `maro/rl/workflows/config/template.yml` for the complete template and detailed explanations.
 
 # Run this workflow by executing one of the following commands:
-# - python .\examples\rl\run_rl_example.py .\examples\rl\vm_scheduling.yml
-# - (Requires installing MARO from source) maro local run .\examples\rl\vm_scheduling.yml
+# - python ./examples/rl/run.py ./examples/rl/vm_scheduling.yml
+# - (Requires installing MARO from source) maro local run ./examples/rl/vm_scheduling.yml
 
 job: vm_scheduling_rl_workflow
 scenario_path: "examples/vm_scheduling/rl"
 
@@ -11,6 +11,7 @@
 actor_net_conf = {
     "hidden_dims": [64, 32, 32],
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": True,
     "batch_norm": False,
     "head": True,
@@ -19,6 +20,7 @@
 critic_net_conf = {
     "hidden_dims": [256, 128, 64],
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": False,
     "batch_norm": False,
     "head": True,
 
@@ -14,6 +14,7 @@
 q_net_conf = {
     "hidden_dims": [64, 128, 256],
     "activation": torch.nn.LeakyReLU,
+    "output_activation": torch.nn.LeakyReLU,
     "softmax": False,
     "batch_norm": False,
     "skip_connection": False,
 
@@ -39,7 +39,8 @@ def __init__(
         input_dim: int,
         output_dim: int,
         hidden_dims: List[int],
-        activation: Optional[Type[torch.nn.Module]] = nn.ReLU,
+        activation: Optional[Type[torch.nn.Module]] = None,
+        output_activation: Optional[Type[torch.nn.Module]] = None,
         head: bool = False,
         softmax: bool = False,
         batch_norm: bool = False,
@@ -54,7 +55,8 @@ def __init__(
         self._output_dim = output_dim
 
         # network features
-        self._activation = activation() if activation else None
+        self._activation = activation if activation else None
+        self._output_activation = output_activation if output_activation else None
         self._head = head
         self._softmax = nn.Softmax(dim=1) if softmax else None
         self._batch_norm = batch_norm
@@ -70,9 +72,13 @@ def __init__(
 
         # build the net
         dims = [self._input_dim] + self._hidden_dims
-        layers = [self._build_layer(in_dim, out_dim) for in_dim, out_dim in zip(dims, dims[1:])]
+        layers = [
+            self._build_layer(in_dim, out_dim, activation=self._activation) for in_dim, out_dim in zip(dims, dims[1:])
+        ]
         # top layer
-        layers.append(self._build_layer(dims[-1], self._output_dim, head=self._head))
+        layers.append(
+            self._build_layer(dims[-1], self._output_dim, head=self._head, activation=self._output_activation),
+        )
 
         self._net = nn.Sequential(*layers)
 
@@ -101,7 +107,13 @@ def input_dim(self) -> int:
     def output_dim(self) -> int:
         return self._output_dim
 
-    def _build_layer(self, input_dim: int, output_dim: int, head: bool = False) -> nn.Module:
+    def _build_layer(
+        self,
+        input_dim: int,
+        output_dim: int,
+        head: bool = False,
+        activation: Type[torch.nn.Module] = None,
+    ) -> nn.Module:
         """Build a basic layer.
 
         BN -> Linear -> Activation -> Dropout
@@ -110,8 +122,8 @@ def _build_layer(self, input_dim: int, output_dim: int, head: bool = False) -> n
         if self._batch_norm:
             components.append(("batch_norm", nn.BatchNorm1d(input_dim)))
         components.append(("linear", nn.Linear(input_dim, output_dim)))
-        if not head and self._activation is not None:
-            components.append(("activation", self._activation))
+        if not head and activation is not None:
+            components.append(("activation", activation()))
         if not head and self._dropout_p:
             components.append(("dropout", nn.Dropout(p=self._dropout_p)))
         return nn.Sequential(OrderedDict(components))