Skip to content

Commit f767cf6

Browse files
iramazanlifacebook-github-bot
authored andcommitted
To change WarmUp Scheduler with ConstantLR and LinearLR (#64395)
Summary: Partially unblocks pytorch/vision#4281 Previously we have added WarmUp Schedulers to PyTorch Core in the PR : #60836 which had two mode of execution - linear and constant depending on warming up function. In this PR we are changing this interface to more direct form, as separating linear and constant modes to separate Schedulers. In particular ```Python scheduler1 = WarmUpLR(optimizer, warmup_factor=0.1, warmup_iters=5, warmup_method="constant") scheduler2 = WarmUpLR(optimizer, warmup_factor=0.1, warmup_iters=5, warmup_method="linear") ``` will look like ```Python scheduler1 = ConstantLR(optimizer, warmup_factor=0.1, warmup_iters=5) scheduler2 = LinearLR(optimizer, warmup_factor=0.1, warmup_iters=5) ``` correspondingly. Pull Request resolved: #64395 Reviewed By: datumbox Differential Revision: D30753688 Pulled By: iramazanli fbshipit-source-id: e47f86d12033f80982ddf1faf5b46873adb4f324
1 parent 75b9e4a commit f767cf6

File tree

4 files changed

+156
-100
lines changed

4 files changed

+156
-100
lines changed

docs/source/optim.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ algorithms.
210210
lr_scheduler.MultiplicativeLR
211211
lr_scheduler.StepLR
212212
lr_scheduler.MultiStepLR
213-
lr_scheduler.WarmUpLR
213+
lr_scheduler.ConstantLR
214+
lr_scheduler.LinearLR
214215
lr_scheduler.ExponentialLR
215216
lr_scheduler.CosineAnnealingLR
216217
lr_scheduler.ReduceLROnPlateau

test/test_optim.py

Lines changed: 61 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from torch.autograd import Variable
1313
from torch import sparse
1414
from torch.optim.lr_scheduler import LambdaLR, MultiplicativeLR, StepLR, \
15-
MultiStepLR, WarmUpLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau, \
15+
MultiStepLR, ConstantLR, LinearLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau, \
1616
_LRScheduler, CyclicLR, CosineAnnealingWarmRestarts, OneCycleLR, ChainedScheduler
1717
from torch.optim.swa_utils import AveragedModel, SWALR, update_bn
1818
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_UBSAN, load_tests, \
@@ -274,16 +274,16 @@ def test_sgd(self):
274274
)
275275
self._test_basic_cases(
276276
lambda weight, bias: optimizer([weight, bias], lr=1e-3),
277-
[lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="linear")]
277+
[lambda opt: LinearLR(opt, start_factor=0.4, end_factor=0.8, total_iters=4)]
278278
)
279279
self._test_basic_cases(
280280
lambda weight, bias: optimizer([weight, bias], lr=1e-3),
281-
[lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="constant")]
281+
[lambda opt: ConstantLR(opt, factor=0.4, total_iters=4)]
282282
)
283283
self._test_basic_cases(
284284
lambda weight, bias: optimizer([weight, bias], lr=1e-3),
285285
[lambda opt: StepLR(opt, gamma=0.9, step_size=10),
286-
lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4)]
286+
lambda opt: LinearLR(opt, start_factor=0.4, end_factor=0.6, total_iters=4)]
287287
)
288288
self._test_basic_cases(
289289
lambda weight, bias: optimizer([weight, bias], lr=1e-3),
@@ -430,17 +430,17 @@ def test_adam(self):
430430
lambda weight, bias: optimizer(
431431
self._build_params_dict(weight, bias, lr=1e-2),
432432
lr=1e-3),
433-
[lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="linear")]
433+
[lambda opt: LinearLR(opt, start_factor=0.4, total_iters=4)]
434434
)
435435
self._test_basic_cases(
436436
lambda weight, bias: optimizer(
437437
self._build_params_dict(weight, bias, lr=1e-2),
438438
lr=1e-3),
439-
[lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="constant")]
439+
[lambda opt: ConstantLR(opt, factor=0.4, total_iters=4)]
440440
)
441441
self._test_basic_cases(
442442
lambda weight, bias: optimizer([weight, bias], lr=1e-3, amsgrad=True),
443-
[lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="constant"),
443+
[lambda opt: ConstantLR(opt, factor=0.4, total_iters=4),
444444
lambda opt: ExponentialLR(opt, gamma=0.9)]
445445
)
446446
self._test_basic_cases(
@@ -992,12 +992,12 @@ def test_exponential_lr_is_constant_for_constant_epoch(self):
992992
scheduler = ExponentialLR(self.opt, gamma=0.9)
993993
self._test_lr_is_constant_for_constant_epoch(scheduler)
994994

995-
def test_constant_warmup_lr_is_constant_for_constant_epoch(self):
996-
scheduler = WarmUpLR(self.opt, warmup_method="constant")
995+
def test_constantlr_is_constant_for_constant_epoch(self):
996+
scheduler = ConstantLR(self.opt)
997997
self._test_lr_is_constant_for_constant_epoch(scheduler)
998998

999-
def test_linear_warmup_lr_is_constant_for_constant_epoch(self):
1000-
scheduler = WarmUpLR(self.opt, warmup_method="linear")
999+
def test_linear_linearlr_is_constant_for_constant_epoch(self):
1000+
scheduler = LinearLR(self.opt)
10011001
self._test_lr_is_constant_for_constant_epoch(scheduler)
10021002

10031003
def test_step_lr(self):
@@ -1051,76 +1051,78 @@ def test_multi_step_lr_with_epoch(self):
10511051
scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
10521052
self._test_with_epoch(scheduler, targets, epochs)
10531053

1054-
def test__get_last_lr_constant_warmup_lr(self):
1054+
def test_get_last_lr_constantlr(self):
10551055
# lr = 0.025 if epoch < 5
10561056
# lr = 0.005 if 5 <= epoch
10571057
epochs = 10
10581058
single_targets = [0.025] * 5 + [0.05] * 5
10591059
targets = [single_targets, [x * epochs for x in single_targets]]
1060-
scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 2, warmup_iters=5, warmup_method="constant")
1060+
scheduler = ConstantLR(self.opt, factor=1.0 / 2, total_iters=5)
10611061
self._test_get_last_lr(scheduler, targets, epochs)
10621062

1063-
def test__get_last_lr_linear_warmup_lr(self):
1063+
def test_get_last_lr_linearlr(self):
10641064
# lr = 0.025 if epoch == 0
10651065
# lr = 0.03125 if epoch == 1
10661066
# lr = 0.0375 if epoch == 2
10671067
# lr = 0.04375 if epoch == 3
10681068
# lr = 0.005 if 4 <= epoch
10691069
epochs = 10
1070-
factor = 1.0 / 2
1070+
start_factor = 1.0 / 4
1071+
end_factor = 3. / 5
10711072
iters = 4
1072-
interpolation = [factor + i * (1 - factor) / iters for i in range(iters)]
1073-
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (epochs - iters)
1073+
interpolation = [start_factor + i * (end_factor - start_factor) / iters for i in range(iters)]
1074+
single_targets = [x * 0.05 for x in interpolation] + [0.05 * end_factor] * (epochs - iters)
10741075
targets = [single_targets, [x * epochs for x in single_targets]]
1075-
scheduler = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1076+
scheduler = LinearLR(self.opt, start_factor=start_factor, end_factor=end_factor, total_iters=iters)
10761077
self._test_get_last_lr(scheduler, targets, epochs)
10771078

1078-
def test__constant_warmup_lr(self):
1079+
def test_constantlr(self):
10791080
# lr = 0.025 if epoch < 5
10801081
# lr = 0.005 if 5 <= epoch
10811082
epochs = 10
10821083
single_targets = [0.025] * 5 + [0.05] * 5
10831084
targets = [single_targets, [x * epochs for x in single_targets]]
1084-
scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 2, warmup_iters=5, warmup_method="constant")
1085+
scheduler = ConstantLR(self.opt, factor=1.0 / 2, total_iters=5)
10851086
self._test(scheduler, targets, epochs)
10861087

1087-
def test__linear_warmup_lr(self):
1088+
def test_linearlr(self):
10881089
# lr = 0.025 if epoch == 0
10891090
# lr = 0.03125 if epoch == 1
10901091
# lr = 0.0375 if epoch == 2
10911092
# lr = 0.04375 if epoch == 3
10921093
# lr = 0.005 if 4 <= epoch
10931094
epochs = 10
1094-
factor = 1.0 / 2
1095+
start_factor = 1.0 / 2
10951096
iters = 4
1096-
interpolation = [factor + i * (1 - factor) / iters for i in range(iters)]
1097+
interpolation = [start_factor + i * (1 - start_factor) / iters for i in range(iters)]
10971098
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (epochs - iters)
10981099
targets = [single_targets, [x * epochs for x in single_targets]]
1099-
scheduler = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1100+
scheduler = LinearLR(self.opt, start_factor=start_factor, total_iters=iters)
11001101
self._test(scheduler, targets, epochs)
11011102

1102-
def test_constant_warmup_with_epoch(self):
1103+
def test_constantlr_with_epoch(self):
11031104
# lr = 0.025 if epoch < 5
11041105
# lr = 0.005 if 5 <= epoch
11051106
epochs = 10
11061107
single_targets = [0.025] * 5 + [0.05] * 5
11071108
targets = [single_targets, [x * epochs for x in single_targets]]
1108-
scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 2, warmup_iters=5, warmup_method="constant")
1109+
scheduler = ConstantLR(self.opt, factor=1.0 / 2, total_iters=5)
11091110
self._test_with_epoch(scheduler, targets, epochs)
11101111

1111-
def test_linear_warmup_with_epoch(self):
1112+
def test_linearlr_with_epoch(self):
11121113
# lr = 0.025 if epoch == 0
11131114
# lr = 0.03125 if epoch == 1
11141115
# lr = 0.0375 if epoch == 2
11151116
# lr = 0.04375 if epoch == 3
11161117
# lr = 0.005 if 4 <= epoch
11171118
epochs = 10
1118-
factor = 1.0 / 2
1119+
start_factor = 1.0 / 2
1120+
end_factor = 1.
11191121
iters = 4
1120-
interpolation = [factor + i * (1 - factor) / iters for i in range(iters)]
1122+
interpolation = [start_factor + i * (end_factor - start_factor) / iters for i in range(iters)]
11211123
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (epochs - iters)
11221124
targets = [single_targets, [x * epochs for x in single_targets]]
1123-
scheduler = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1125+
scheduler = LinearLR(self.opt, start_factor=start_factor, total_iters=iters)
11241126
self._test_with_epoch(scheduler, targets, epochs)
11251127

11261128
def test_exp_lr(self):
@@ -1145,14 +1147,14 @@ def test_closed_form_step_lr(self):
11451147
closed_form_scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
11461148
self._test_against_closed_form(scheduler, closed_form_scheduler, 20)
11471149

1148-
def test_closed_form_linear_warmup_lr(self):
1149-
scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 3, warmup_iters=4, warmup_method="linear")
1150-
closed_form_scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 3, warmup_iters=4, warmup_method="linear")
1150+
def test_closed_form_linearlr(self):
1151+
scheduler = LinearLR(self.opt, start_factor=1.0 / 3, end_factor=0.7, total_iters=4)
1152+
closed_form_scheduler = LinearLR(self.opt, start_factor=1.0 / 3, end_factor=0.7, total_iters=4)
11511153
self._test_against_closed_form(scheduler, closed_form_scheduler, 20)
11521154

1153-
def test_closed_form_constant_warmup_lr(self):
1154-
scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 3, warmup_iters=4, warmup_method="constant")
1155-
closed_form_scheduler = WarmUpLR(self.opt, warmup_factor=1.0 / 3, warmup_iters=4, warmup_method="constant")
1155+
def test_closed_form_constantlr(self):
1156+
scheduler = ConstantLR(self.opt, factor=1.0 / 3, total_iters=4)
1157+
closed_form_scheduler = ConstantLR(self.opt, factor=1.0 / 3, total_iters=4)
11561158
self._test_against_closed_form(scheduler, closed_form_scheduler, 20)
11571159

11581160
def test_closed_form_multi_step_lr(self):
@@ -1265,15 +1267,15 @@ def test_chained_lr2(self):
12651267
epochs = 10
12661268
schedulers = [None] * 1
12671269
targets = [[0.02, 0.03, 0.04] + [0.05] * 9]
1268-
schedulers[0] = WarmUpLR(self.opt, warmup_factor=0.4, warmup_iters=3, warmup_method="linear")
1270+
schedulers[0] = LinearLR(self.opt, start_factor=0.4, total_iters=3)
12691271
scheduler = ChainedScheduler(schedulers)
12701272
self._test([scheduler], targets, epochs)
12711273

12721274
def test_chained_lr3(self):
12731275
epochs = 10
12741276
schedulers = [None] * 2
12751277
targets = [[0.02, 0.03, 0.04, 0.05] + [0.005] * 4 + [0.0005] * 3 + [0.00005] * 3]
1276-
schedulers[0] = WarmUpLR(self.opt, warmup_factor=0.4, warmup_iters=3, warmup_method="linear")
1278+
schedulers[0] = LinearLR(self.opt, start_factor=0.4, total_iters=3)
12771279
schedulers[1] = MultiStepLR(self.opt, milestones=[4, 8, 10], gamma=0.1)
12781280
scheduler = ChainedScheduler(schedulers)
12791281
self._test([scheduler], targets, epochs)
@@ -1286,7 +1288,7 @@ def test_chained_lr4(self):
12861288
+ [0.05 * 0.9 ** x * 0.1 for x in range(4, 6)]
12871289
+ [0.05 * 0.9 ** x * 0.01 for x in range(6, 9)]]
12881290
schedulers[0] = ExponentialLR(self.opt, gamma=0.9)
1289-
schedulers[1] = WarmUpLR(self.opt, warmup_factor=0.2, warmup_iters=4, warmup_method="constant")
1291+
schedulers[1] = ConstantLR(self.opt, factor=0.2, total_iters=4)
12901292
schedulers[2] = StepLR(self.opt, gamma=0.1, step_size=3)
12911293
scheduler = ChainedScheduler(schedulers)
12921294
self._test([scheduler], targets, epochs)
@@ -1323,41 +1325,44 @@ def test_compound_exp_and_multistep_lr(self):
13231325
schedulers[1] = ExponentialLR(self.opt, gamma=0.9)
13241326
self._test(schedulers, targets, epochs)
13251327

1326-
def test_compound_exp_and_linear_warmup_lr(self):
1328+
def test_compound_exp_and_linearlr(self):
13271329
epochs = 10
13281330
iters = 4
1329-
factor = 0.4
1331+
start_factor = 0.4
1332+
end_factor = 0.9
13301333
schedulers = [None] * 2
13311334
single_targets = [0.05 * (0.9 ** x) for x in range(11)]
13321335
for i in range(iters):
1333-
single_targets[i] *= factor + i / iters * (1 - factor)
1336+
single_targets[i] *= start_factor + i / iters * (end_factor - start_factor)
1337+
for i in range(iters, 11):
1338+
single_targets[i] *= end_factor
13341339
targets = [single_targets, [x * epochs for x in single_targets]]
1335-
schedulers[0] = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1340+
schedulers[0] = LinearLR(self.opt, start_factor=start_factor, end_factor=end_factor, total_iters=iters)
13361341
schedulers[1] = ExponentialLR(self.opt, gamma=0.9)
13371342
self._test(schedulers, targets, epochs)
13381343

1339-
def test_compound_step_and_constant_warmup(self):
1344+
def test_compound_step_and_constantlr(self):
13401345
epochs = 10
13411346
iters = 4
13421347
factor = 0.4
13431348
schedulers = [None] * 2
13441349
single_targets = [0.05 * 0.4] * 3 + [0.005 * 0.4] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 3
13451350
targets = [single_targets, [x * epochs for x in single_targets]]
13461351
schedulers[0] = StepLR(self.opt, gamma=0.1, step_size=3)
1347-
schedulers[1] = WarmUpLR(self.opt, warmup_factor=0.4, warmup_iters=4, warmup_method="constant")
1352+
schedulers[1] = ConstantLR(self.opt, factor=0.4, total_iters=4)
13481353
self._test(schedulers, targets, epochs)
13491354

1350-
def test_compound_linear_warmup_and_multistep_lr(self):
1355+
def test_compound_linearlr_and_multistep_lr(self):
13511356
epochs = 10
13521357
iters = 4
1353-
factor = 0.4
1358+
start_factor = 0.4
13541359
schedulers = [None] * 2
13551360
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 2
13561361
for i in range(iters):
1357-
single_targets[i] *= factor + i / iters * (1 - factor)
1362+
single_targets[i] *= start_factor + i / iters * (1 - start_factor)
13581363
targets = [single_targets, [x * epochs for x in single_targets]]
13591364
schedulers[0] = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
1360-
schedulers[1] = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1365+
schedulers[1] = LinearLR(self.opt, start_factor=start_factor, total_iters=iters)
13611366
self._test(schedulers, targets, epochs)
13621367

13631368
def test_compound_cosanneal_and_step_lr(self):
@@ -1387,19 +1392,19 @@ def test_compound_cosanneal_and_multistep_lr(self):
13871392
schedulers[1] = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
13881393
self._test(schedulers, targets, epochs)
13891394

1390-
def test_compound_cosanneal_and_linear_warmup_lr(self):
1395+
def test_compound_cosanneal_and_linearlr(self):
13911396
epochs = 10
13921397
iters = 4
1393-
factor = 0.4
1398+
start_factor = 0.4
13941399
eta_min = 1e-10
13951400
schedulers = [None] * 2
13961401
single_targets = [eta_min + (0.05 - eta_min) *
13971402
(1 + math.cos(math.pi * x / epochs)) / 2
13981403
for x in range(epochs)]
13991404
for i in range(iters):
1400-
single_targets[i] *= factor + i / iters * (1 - factor)
1405+
single_targets[i] *= start_factor + i / iters * (1 - start_factor)
14011406
targets = [single_targets, [x * epochs for x in single_targets]]
1402-
schedulers[0] = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1407+
schedulers[0] = LinearLR(self.opt, start_factor=start_factor, total_iters=iters)
14031408
schedulers[1] = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
14041409
self._test(schedulers, targets, epochs)
14051410

@@ -1485,22 +1490,22 @@ def test_compound_reduce_lr_on_plateau4(self):
14851490

14861491
def test_compound_reduce_lr_on_plateau5(self):
14871492
iters = 4
1488-
factor = 0.4
1493+
start_factor = 0.4
14891494
epochs = 22
14901495
for param_group in self.opt.param_groups:
14911496
param_group['lr'] = 0.5
14921497
single_targets = [0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2
14931498
multipliers = [1] * 22
14941499
for i in range(iters):
1495-
multipliers[i] *= factor + i / iters * (1 - factor)
1500+
multipliers[i] *= start_factor + i / iters * (1 - start_factor)
14961501
single_targets = [x * y for x, y in zip(single_targets, multipliers)]
14971502
targets = [single_targets]
14981503
targets = targets[1:] # test runs step before checking lr
14991504
metrics = [10 - i * 0.0165 for i in range(22)]
15001505
schedulers = [None] * 2
15011506
schedulers[0] = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
15021507
mode='min', threshold=0.1)
1503-
schedulers[1] = WarmUpLR(self.opt, warmup_factor=factor, warmup_iters=iters, warmup_method="linear")
1508+
schedulers[1] = LinearLR(self.opt, start_factor=start_factor, total_iters=iters)
15041509
self._test_reduce_lr_on_plateau(schedulers, targets, metrics, epochs)
15051510

15061511
def test_cycle_lr_invalid_mode(self):

0 commit comments

Comments
 (0)