Skip to content
This repository was archived by the owner on Jul 1, 2024. It is now read-only.

Commit 841b439

Browse files
vreisfacebook-github-bot
authored andcommitted
Remove local_variables from train_step/eval_step (#412)
Summary: Pull Request resolved: #412 This is part of a series of diffs to eliminate local_variables (see D20171981). Now that we've removed local_variables from step, remove it from train_step, eval_step. Reviewed By: mannatsingh Differential Revision: D20170006 fbshipit-source-id: a5c6525424fc89711de40b8b6906b44c8aa608f4
1 parent e29cc72 commit 841b439

File tree

5 files changed

+35
-78
lines changed

5 files changed

+35
-78
lines changed

classy_vision/tasks/classification_task.py

Lines changed: 26 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -638,120 +638,85 @@ def set_classy_state(self, state):
638638
# Set up pytorch module in train vs eval mode, update optimizer.
639639
self._set_model_train_mode()
640640

641-
def eval_step(self, use_gpu, local_variables=None):
642-
if local_variables is None:
643-
local_variables = {}
644-
641+
def eval_step(self, use_gpu):
645642
self.last_batch = None
646643

647644
# Process next sample
648645
sample = next(self.get_data_iterator())
649-
local_variables["sample"] = sample
650646

651-
assert (
652-
isinstance(local_variables["sample"], dict)
653-
and "input" in local_variables["sample"]
654-
and "target" in local_variables["sample"]
655-
), (
647+
assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
656648
f"Returned sample [{sample}] is not a map with 'input' and"
657649
+ "'target' keys"
658650
)
659651

660652
# Copy sample to GPU
661-
local_variables["target"] = local_variables["sample"]["target"]
653+
target = sample["target"]
662654
if use_gpu:
663-
for key, value in local_variables["sample"].items():
664-
local_variables["sample"][key] = recursive_copy_to_gpu(
665-
value, non_blocking=True
666-
)
655+
for key, value in sample.items():
656+
sample[key] = recursive_copy_to_gpu(value, non_blocking=True)
667657

668658
with torch.no_grad():
669-
local_variables["output"] = self.model(local_variables["sample"]["input"])
659+
output = self.model(sample["input"])
670660

671-
local_variables["local_loss"] = self.compute_loss(
672-
local_variables["output"], local_variables["sample"]
673-
)
661+
local_loss = self.compute_loss(output, sample)
674662

675-
local_variables["loss"] = local_variables["local_loss"].detach().clone()
676-
local_variables["loss"] = all_reduce_mean(local_variables["loss"])
663+
loss = local_loss.detach().clone()
664+
loss = all_reduce_mean(loss)
677665

678-
self.losses.append(
679-
local_variables["loss"].data.cpu().item()
680-
* local_variables["target"].size(0)
681-
)
666+
self.losses.append(loss.data.cpu().item() * target.size(0))
682667

683-
self.update_meters(local_variables["output"], local_variables["sample"])
668+
self.update_meters(output, sample)
684669

685670
# Move some data to the task so hooks get a chance to access it
686671
self.last_batch = LastBatchInfo(
687-
loss=local_variables["loss"],
688-
output=local_variables["output"],
689-
target=local_variables["target"],
690-
sample=local_variables["sample"],
672+
loss=loss, output=output, target=target, sample=sample
691673
)
692674

693-
def train_step(self, use_gpu, local_variables=None):
675+
def train_step(self, use_gpu):
694676
"""Train step to be executed in train loop
695677
696678
Args:
697679
use_gpu: if true, execute training on GPU
698-
local_variables: Dict containing intermediate values
699-
in train_step for access by hooks
700680
"""
701681

702-
if local_variables is None:
703-
local_variables = {}
704-
705682
self.last_batch = None
706683

707684
# Process next sample
708685
sample = next(self.get_data_iterator())
709-
local_variables["sample"] = sample
710686

711-
assert (
712-
isinstance(local_variables["sample"], dict)
713-
and "input" in local_variables["sample"]
714-
and "target" in local_variables["sample"]
715-
), (
687+
assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
716688
f"Returned sample [{sample}] is not a map with 'input' and"
717689
+ "'target' keys"
718690
)
719691

720692
# Copy sample to GPU
721-
local_variables["target"] = local_variables["sample"]["target"]
693+
target = sample["target"]
722694
if use_gpu:
723-
for key, value in local_variables["sample"].items():
724-
local_variables["sample"][key] = recursive_copy_to_gpu(
725-
value, non_blocking=True
726-
)
695+
for key, value in sample.items():
696+
sample[key] = recursive_copy_to_gpu(value, non_blocking=True)
727697

728698
with torch.enable_grad():
729699
# Forward pass
730-
local_variables["output"] = self.model(local_variables["sample"]["input"])
700+
output = self.model(sample["input"])
731701

732-
local_variables["local_loss"] = self.compute_loss(
733-
local_variables["output"], local_variables["sample"]
734-
)
702+
local_loss = self.compute_loss(output, sample)
735703

736-
local_variables["loss"] = local_variables["local_loss"].detach().clone()
737-
local_variables["loss"] = all_reduce_mean(local_variables["loss"])
704+
loss = local_loss.detach().clone()
705+
loss = all_reduce_mean(loss)
738706

739-
self.losses.append(
740-
local_variables["loss"].data.cpu().item()
741-
* local_variables["target"].size(0)
742-
)
707+
self.losses.append(loss.data.cpu().item() * target.size(0))
743708

744-
self.update_meters(local_variables["output"], local_variables["sample"])
709+
self.update_meters(output, sample)
745710

746711
# Run backwards pass / update optimizer
747712
if self.amp_opt_level is not None:
748713
self.optimizer.zero_grad()
749714
with apex.amp.scale_loss(
750-
local_variables["local_loss"], self.optimizer.optimizer
715+
local_loss, self.optimizer.optimizer
751716
) as scaled_loss:
752717
scaled_loss.backward()
753718
else:
754-
self.optimizer.backward(local_variables["local_loss"])
719+
self.optimizer.backward(local_loss)
755720

756721
self.optimizer.update_schedule_on_step(self.where)
757722
self.optimizer.step()
@@ -760,10 +725,7 @@ def train_step(self, use_gpu, local_variables=None):
760725

761726
# Move some data to the task so hooks get a chance to access it
762727
self.last_batch = LastBatchInfo(
763-
loss=local_variables["loss"],
764-
output=local_variables["output"],
765-
target=local_variables["target"],
766-
sample=local_variables["sample"],
728+
loss=loss, output=output, target=target, sample=sample
767729
)
768730

769731
def compute_loss(self, model_output, sample):

classy_vision/tasks/classy_task.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,14 @@ def prepare(
107107
pass
108108

109109
@abstractmethod
110-
def train_step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
110+
def train_step(self, use_gpu) -> None:
111111
"""
112112
Run a train step.
113113
114114
This corresponds to training over one batch of data from the dataloaders.
115115
116116
Args:
117117
use_gpu: True if training on GPUs, False otherwise
118-
local_variables: Local variables created in the function. Can be passed to
119-
custom :class:`classy_vision.hooks.ClassyHook`.
120118
"""
121119
pass
122120

@@ -157,26 +155,24 @@ def on_end(self, local_variables):
157155
pass
158156

159157
@abstractmethod
160-
def eval_step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
158+
def eval_step(self, use_gpu) -> None:
161159
"""
162160
Run an evaluation step.
163161
164162
This corresponds to evaluating the model over one batch of data.
165163
166164
Args:
167165
use_gpu: True if training on GPUs, False otherwise
168-
local_variables: Local variables created in the function. Can be passed to
169-
custom :class:`classy_vision.hooks.ClassyHook`.
170166
"""
171167
pass
172168

173-
def step(self, use_gpu, local_variables: Optional[Dict] = None) -> None:
169+
def step(self, use_gpu) -> None:
174170
from classy_vision.hooks import ClassyHookFunctions
175171

176172
if self.train:
177-
self.train_step(use_gpu, local_variables)
173+
self.train_step(use_gpu)
178174
else:
179-
self.eval_step(use_gpu, local_variables)
175+
self.eval_step(use_gpu)
180176

181177
for hook in self.hooks:
182178
hook.on_step(self)

classy_vision/trainer/classy_trainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def train(self, task: ClassyTask):
7777
task.on_phase_start(local_variables)
7878
while True:
7979
try:
80-
task.step(self.use_gpu, local_variables)
80+
task.step(self.use_gpu)
8181
except StopIteration:
8282
break
8383
task.on_phase_end(local_variables)

classy_vision/trainer/elastic_trainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _run_step(self, state, local_variables, use_gpu):
106106
state.advance_to_next_phase = True
107107
state.skip_current_phase = False # Reset flag
108108
else:
109-
state.task.step(use_gpu, local_variables)
109+
state.task.step(use_gpu)
110110
except StopIteration:
111111
state.advance_to_next_phase = True
112112

test/tasks_classification_task_test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ def test_checkpointing(self):
7373
task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])
7474

7575
use_gpu = torch.cuda.is_available()
76-
local_variables = {}
7776

7877
# prepare the tasks for the right device
7978
task.prepare(use_gpu=use_gpu)
@@ -96,8 +95,8 @@ def test_checkpointing(self):
9695

9796
# test that the train step runs the same way on both states
9897
# and the loss remains the same
99-
task.train_step(use_gpu, local_variables)
100-
task_2.train_step(use_gpu, local_variables)
98+
task.train_step(use_gpu)
99+
task_2.train_step(use_gpu)
101100
self._compare_states(task.get_classy_state(), task_2.get_classy_state())
102101

103102
def test_final_train_checkpoint(self):

0 commit comments

Comments
 (0)