Remove local_variables from train_step/eval_step (#412)

vreis · facebook-github-bot · commit 841b43965546 · 2020-03-02T19:28:36.000-08:00
Summary: Pull Request resolved: #412 This is part of a series of diffs to eliminate local_variables (see D20171981). Now that we've removed local_variables from step, remove it from train_step, eval_step. Reviewed By: mannatsingh Differential Revision: D20170006 fbshipit-source-id: a5c6525424fc89711de40b8b6906b44c8aa608f4
diff --git a/classy_vision/tasks/classification_task.py b/classy_vision/tasks/classification_task.py
@@ -638,120 +638,85 @@ def set_classy_state(self, state):
         # Set up pytorch module in train vs eval mode, update optimizer.
         self._set_model_train_mode()
 
-    def eval_step(self, use_gpu, local_variables=None):
-        if local_variables is None:
-            local_variables = {}
-
+    def eval_step(self, use_gpu):
         self.last_batch = None
 
         # Process next sample
         sample = next(self.get_data_iterator())
-        local_variables["sample"] = sample
 
-        assert (
-            isinstance(local_variables["sample"], dict)
-            and "input" in local_variables["sample"]
-            and "target" in local_variables["sample"]
-        ), (
+        assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
             f"Returned sample [{sample}] is not a map with 'input' and"
             + "'target' keys"
         )
 
         # Copy sample to GPU
-        local_variables["target"] = local_variables["sample"]["target"]
+        target = sample["target"]
         if use_gpu:
-            for key, value in local_variables["sample"].items():
-                local_variables["sample"][key] = recursive_copy_to_gpu(
-                    value, non_blocking=True
-                )
+            for key, value in sample.items():
+                sample[key] = recursive_copy_to_gpu(value, non_blocking=True)
 
         with torch.no_grad():
-            local_variables["output"] = self.model(local_variables["sample"]["input"])
+            output = self.model(sample["input"])
 
-            local_variables["local_loss"] = self.compute_loss(
-                local_variables["output"], local_variables["sample"]
-            )
+            local_loss = self.compute_loss(output, sample)
 
-            local_variables["loss"] = local_variables["local_loss"].detach().clone()
-            local_variables["loss"] = all_reduce_mean(local_variables["loss"])
+            loss = local_loss.detach().clone()
+            loss = all_reduce_mean(loss)
 
-            self.losses.append(
-                local_variables["loss"].data.cpu().item()
-                * local_variables["target"].size(0)
-            )
+            self.losses.append(loss.data.cpu().item() * target.size(0))
 
-            self.update_meters(local_variables["output"], local_variables["sample"])
+            self.update_meters(output, sample)
 
         # Move some data to the task so hooks get a chance to access it
         self.last_batch = LastBatchInfo(
-            loss=local_variables["loss"],
-            output=local_variables["output"],
-            target=local_variables["target"],
-            sample=local_variables["sample"],
+            loss=loss, output=output, target=target, sample=sample
         )
 
-    def train_step(self, use_gpu, local_variables=None):
+    def train_step(self, use_gpu):
         """Train step to be executed in train loop
 
         Args:
             use_gpu: if true, execute training on GPU
-            local_variables: Dict containing intermediate values
-                in train_step for access by hooks
         """
 
-        if local_variables is None:
-            local_variables = {}
-
         self.last_batch = None
 
         # Process next sample
         sample = next(self.get_data_iterator())
-        local_variables["sample"] = sample
 
-        assert (
-            isinstance(local_variables["sample"], dict)
-            and "input" in local_variables["sample"]
-            and "target" in local_variables["sample"]
-        ), (
+        assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
             f"Returned sample [{sample}] is not a map with 'input' and"
             + "'target' keys"
         )
 
         # Copy sample to GPU
-        local_variables["target"] = local_variables["sample"]["target"]
+        target = sample["target"]
         if use_gpu:
-            for key, value in local_variables["sample"].items():
-                local_variables["sample"][key] = recursive_copy_to_gpu(
-                    value, non_blocking=True
-                )
+            for key, value in sample.items():
+                sample[key] = recursive_copy_to_gpu(value, non_blocking=True)
 
         with torch.enable_grad():
             # Forward pass
-            local_variables["output"] = self.model(local_variables["sample"]["input"])
+            output = self.model(sample["input"])
 
-            local_variables["local_loss"] = self.compute_loss(
-                local_variables["output"], local_variables["sample"]
-            )
+            local_loss = self.compute_loss(output, sample)
 
-            local_variables["loss"] = local_variables["local_loss"].detach().clone()
-            local_variables["loss"] = all_reduce_mean(local_variables["loss"])
+            loss = local_loss.detach().clone()
+            loss = all_reduce_mean(loss)
 
-            self.losses.append(
-                local_variables["loss"].data.cpu().item()
-                * local_variables["target"].size(0)
-            )
+            self.losses.append(loss.data.cpu().item() * target.size(0))
 
-            self.update_meters(local_variables["output"], local_variables["sample"])
+            self.update_meters(output, sample)
 
         # Run backwards pass / update optimizer
         if self.amp_opt_level is not None:
             self.optimizer.zero_grad()
             with apex.amp.scale_loss(
-                local_variables["local_loss"], self.optimizer.optimizer
+                local_loss, self.optimizer.optimizer
             ) as scaled_loss:
                 scaled_loss.backward()
         else:
-            self.optimizer.backward(local_variables["local_loss"])
+            self.optimizer.backward(local_loss)
 
         self.optimizer.update_schedule_on_step(self.where)
         self.optimizer.step()
@@ -760,10 +725,7 @@ def train_step(self, use_gpu, local_variables=None):
 
         # Move some data to the task so hooks get a chance to access it
         self.last_batch = LastBatchInfo(
-            loss=local_variables["loss"],
-            output=local_variables["output"],
-            target=local_variables["target"],
-            sample=local_variables["sample"],
+            loss=loss, output=output, target=target, sample=sample
         )
 
     def compute_loss(self, model_output, sample):
diff --git a/classy_vision/tasks/classy_task.py b/classy_vision/tasks/classy_task.py
@@ -107,16 +107,14 @@ def prepare(
         pass
 
     @abstractmethod
-    def train_step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
+    def train_step(self, use_gpu) -> None:
         """
         Run a train step.
 
         This corresponds to training over one batch of data from the dataloaders.
 
         Args:
             use_gpu: True if training on GPUs, False otherwise
-            local_variables: Local variables created in the function. Can be passed to
-                custom :class:`classy_vision.hooks.ClassyHook`.
         """
         pass
 
@@ -157,26 +155,24 @@ def on_end(self, local_variables):
         pass
 
     @abstractmethod
-    def eval_step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
+    def eval_step(self, use_gpu) -> None:
         """
         Run an evaluation step.
 
         This corresponds to evaluating the model over one batch of data.
 
         Args:
             use_gpu: True if training on GPUs, False otherwise
-            local_variables: Local variables created in the function. Can be passed to
-                custom :class:`classy_vision.hooks.ClassyHook`.
         """
         pass
 
-    def step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
+    def step(self, use_gpu) -> None:
         from classy_vision.hooks import ClassyHookFunctions
 
         if self.train:
-            self.train_step(use_gpu, local_variables)
+            self.train_step(use_gpu)
         else:
-            self.eval_step(use_gpu, local_variables)
+            self.eval_step(use_gpu)
 
         for hook in self.hooks:
             hook.on_step(self)
diff --git a/classy_vision/trainer/classy_trainer.py b/classy_vision/trainer/classy_trainer.py
@@ -77,7 +77,7 @@ def train(self, task: ClassyTask):
             task.on_phase_start(local_variables)
             while True:
                 try:
-                    task.step(self.use_gpu, local_variables)
+                    task.step(self.use_gpu)
                 except StopIteration:
                     break
             task.on_phase_end(local_variables)
diff --git a/classy_vision/trainer/elastic_trainer.py b/classy_vision/trainer/elastic_trainer.py
@@ -106,7 +106,7 @@ def _run_step(self, state, local_variables, use_gpu):
                 state.advance_to_next_phase = True
                 state.skip_current_phase = False  # Reset flag
             else:
-                state.task.step(use_gpu, local_variables)
+                state.task.step(use_gpu)
         except StopIteration:
             state.advance_to_next_phase = True
 
diff --git a/test/tasks_classification_task_test.py b/test/tasks_classification_task_test.py
@@ -73,7 +73,6 @@ def test_checkpointing(self):
         task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])
 
         use_gpu = torch.cuda.is_available()
-        local_variables = {}
 
         # prepare the tasks for the right device
         task.prepare(use_gpu=use_gpu)
@@ -96,8 +95,8 @@ def test_checkpointing(self):
 
             # test that the train step runs the same way on both states
             # and the loss remains the same
-            task.train_step(use_gpu, local_variables)
-            task_2.train_step(use_gpu, local_variables)
+            task.train_step(use_gpu)
+            task_2.train_step(use_gpu)
             self._compare_states(task.get_classy_state(), task_2.get_classy_state())
 
     def test_final_train_checkpoint(self):