Skip to content

Commit 920e9db

Browse files
cuichenxclaude
andcommitted
Rename pretrain_config to pretrain_mock_config
Rename all VLM pretrain recipe functions from *_pretrain_config to *_pretrain_mock_config to clearly indicate they use mock datasets. Updated imports in __init__.py, perf scripts, and examples. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]> Signed-off-by: Chen Cui <[email protected]>
1 parent 696fd7f commit 920e9db

5 files changed

Lines changed: 41 additions & 41 deletions

File tree

examples/decentralized_pg/pretrain_qwen3_vl_simple.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@
3232

3333
import torch
3434

35-
from megatron.bridge.recipes.qwen_vl.qwen3_vl import qwen3_vl_30b_a3b_pretrain_config
35+
from megatron.bridge.recipes.qwen_vl.qwen3_vl import qwen3_vl_30b_a3b_pretrain_mock_config
3636
from megatron.bridge.training.pretrain import pretrain
3737
from megatron.bridge.training.vlm_step import forward_step
3838

3939

4040
def main() -> None:
4141
"""Run Qwen3 pretraining with decentralized process groups enabled."""
4242
# Get the standard Qwen3 4B pretrain config with overrides
43-
cfg = qwen3_vl_30b_a3b_pretrain_config(
43+
cfg = qwen3_vl_30b_a3b_pretrain_mock_config(
4444
# Use mock data for demo
4545
mock=True,
4646
# Parallelism

scripts/performance/configs/qwen_vl/qwen3_vl_pretrain.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from utils.precision import get_precision_config
1919

2020
from megatron.bridge.recipes.qwen_vl.qwen3_vl import (
21-
qwen3_vl_30b_a3b_pretrain_config,
22-
qwen3_vl_235b_a22b_pretrain_config,
21+
qwen3_vl_30b_a3b_pretrain_mock_config,
22+
qwen3_vl_235b_a22b_pretrain_mock_config,
2323
)
2424
from megatron.bridge.training.comm_overlap import CommOverlapConfig
2525
from megatron.bridge.training.config import ConfigContainer
@@ -77,7 +77,7 @@ def set_qwen3_vl_common_configs(cfg: ConfigContainer) -> None:
7777
cfg.comm_overlap.overlap_grad_reduce = False
7878

7979

80-
def qwen3_vl_235b_a22b_pretrain_config_gb300(
80+
def qwen3_vl_235b_a22b_pretrain_mock_config_gb300(
8181
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
8282
) -> ConfigContainer:
8383
"""GB300, baseline config."""
@@ -90,7 +90,7 @@ def qwen3_vl_235b_a22b_pretrain_config_gb300(
9090
base_cfg = QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_GB300_FP8_MX
9191
precision_config = get_precision_config(precision)
9292

93-
cfg = qwen3_vl_235b_a22b_pretrain_config(
93+
cfg = qwen3_vl_235b_a22b_pretrain_mock_config(
9494
mock=mock,
9595
precision_config=precision_config,
9696
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -102,7 +102,7 @@ def qwen3_vl_235b_a22b_pretrain_config_gb300(
102102
return cfg
103103

104104

105-
def qwen3_vl_235b_a22b_pretrain_config_gb200(
105+
def qwen3_vl_235b_a22b_pretrain_mock_config_gb200(
106106
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
107107
) -> ConfigContainer:
108108
"""GB200, baseline config."""
@@ -115,7 +115,7 @@ def qwen3_vl_235b_a22b_pretrain_config_gb200(
115115
base_cfg = QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_GB200_FP8_MX
116116
precision_config = get_precision_config(precision)
117117

118-
cfg = qwen3_vl_235b_a22b_pretrain_config(
118+
cfg = qwen3_vl_235b_a22b_pretrain_mock_config(
119119
mock=mock,
120120
precision_config=precision_config,
121121
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -127,7 +127,7 @@ def qwen3_vl_235b_a22b_pretrain_config_gb200(
127127
return cfg
128128

129129

130-
def qwen3_vl_235b_a22b_pretrain_config_b200(
130+
def qwen3_vl_235b_a22b_pretrain_mock_config_b200(
131131
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
132132
) -> ConfigContainer:
133133
"""B200, baseline config."""
@@ -140,7 +140,7 @@ def qwen3_vl_235b_a22b_pretrain_config_b200(
140140
base_cfg = QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_B200_FP8_MX
141141
precision_config = get_precision_config(precision)
142142

143-
cfg = qwen3_vl_235b_a22b_pretrain_config(
143+
cfg = qwen3_vl_235b_a22b_pretrain_mock_config(
144144
mock=mock,
145145
precision_config=precision_config,
146146
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -157,7 +157,7 @@ def qwen3_vl_235b_a22b_pretrain_config_b200(
157157
return cfg
158158

159159

160-
def qwen3_vl_235b_a22b_pretrain_config_h100(
160+
def qwen3_vl_235b_a22b_pretrain_mock_config_h100(
161161
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
162162
) -> ConfigContainer:
163163
"""H100, baseline config."""
@@ -168,7 +168,7 @@ def qwen3_vl_235b_a22b_pretrain_config_h100(
168168
base_cfg = QWEN3_VL_235B_A22B_PRETRAIN_CONFIG_H100_FP8_CS
169169
precision_config = get_precision_config(precision)
170170

171-
cfg = qwen3_vl_235b_a22b_pretrain_config(
171+
cfg = qwen3_vl_235b_a22b_pretrain_mock_config(
172172
mock=mock,
173173
precision_config=precision_config,
174174
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=False),
@@ -180,7 +180,7 @@ def qwen3_vl_235b_a22b_pretrain_config_h100(
180180
return cfg
181181

182182

183-
def qwen3_vl_30b_a3b_pretrain_config_gb300(
183+
def qwen3_vl_30b_a3b_pretrain_mock_config_gb300(
184184
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
185185
) -> ConfigContainer:
186186
"""GB300, baseline config."""
@@ -193,7 +193,7 @@ def qwen3_vl_30b_a3b_pretrain_config_gb300(
193193
base_cfg = QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_GB300_FP8_MX
194194
precision_config = get_precision_config(precision)
195195

196-
cfg = qwen3_vl_30b_a3b_pretrain_config(
196+
cfg = qwen3_vl_30b_a3b_pretrain_mock_config(
197197
mock=mock,
198198
precision_config=precision_config,
199199
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -205,7 +205,7 @@ def qwen3_vl_30b_a3b_pretrain_config_gb300(
205205
return cfg
206206

207207

208-
def qwen3_vl_30b_a3b_pretrain_config_gb200(
208+
def qwen3_vl_30b_a3b_pretrain_mock_config_gb200(
209209
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
210210
) -> ConfigContainer:
211211
"""GB200, baseline config."""
@@ -218,7 +218,7 @@ def qwen3_vl_30b_a3b_pretrain_config_gb200(
218218
base_cfg = QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_GB200_FP8_MX
219219
precision_config = get_precision_config(precision)
220220

221-
cfg = qwen3_vl_30b_a3b_pretrain_config(
221+
cfg = qwen3_vl_30b_a3b_pretrain_mock_config(
222222
mock=mock,
223223
precision_config=precision_config,
224224
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -230,7 +230,7 @@ def qwen3_vl_30b_a3b_pretrain_config_gb200(
230230
return cfg
231231

232232

233-
def qwen3_vl_30b_a3b_pretrain_config_b200(
233+
def qwen3_vl_30b_a3b_pretrain_mock_config_b200(
234234
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
235235
) -> ConfigContainer:
236236
"""B200, baseline config."""
@@ -243,7 +243,7 @@ def qwen3_vl_30b_a3b_pretrain_config_b200(
243243
base_cfg = QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_B200_FP8_MX
244244
precision_config = get_precision_config(precision)
245245

246-
cfg = qwen3_vl_30b_a3b_pretrain_config(
246+
cfg = qwen3_vl_30b_a3b_pretrain_mock_config(
247247
mock=mock,
248248
precision_config=precision_config,
249249
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),
@@ -255,7 +255,7 @@ def qwen3_vl_30b_a3b_pretrain_config_b200(
255255
return cfg
256256

257257

258-
def qwen3_vl_30b_a3b_pretrain_config_h100(
258+
def qwen3_vl_30b_a3b_pretrain_mock_config_h100(
259259
precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
260260
) -> ConfigContainer:
261261
"""H100, baseline config."""
@@ -266,7 +266,7 @@ def qwen3_vl_30b_a3b_pretrain_config_h100(
266266
base_cfg = QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_H100_FP8_CS
267267
precision_config = get_precision_config(precision)
268268

269-
cfg = qwen3_vl_30b_a3b_pretrain_config(
269+
cfg = qwen3_vl_30b_a3b_pretrain_mock_config(
270270
mock=mock,
271271
precision_config=precision_config,
272272
comm_overlap_config=CommOverlapConfig(tp_comm_overlap=True),

src/megatron/bridge/recipes/qwen_vl/__init__.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
from .qwen3_vl import (
1818
qwen3_vl_8b_peft_config,
1919
qwen3_vl_8b_peft_energon_config,
20-
qwen3_vl_8b_pretrain_config,
20+
qwen3_vl_8b_pretrain_mock_config,
2121
qwen3_vl_8b_sft_config,
2222
qwen3_vl_30b_a3b_peft_config,
23-
qwen3_vl_30b_a3b_pretrain_config,
23+
qwen3_vl_30b_a3b_pretrain_mock_config,
2424
qwen3_vl_30b_a3b_sft_config,
2525
qwen3_vl_235b_a22b_peft_config,
26-
qwen3_vl_235b_a22b_pretrain_config,
26+
qwen3_vl_235b_a22b_pretrain_mock_config,
2727
qwen3_vl_235b_a22b_sft_config,
2828
)
2929
from .qwen25_vl import (
@@ -44,19 +44,19 @@
4444
qwen35_vl_4b_peft_config,
4545
qwen35_vl_4b_sft_config,
4646
qwen35_vl_9b_peft_config,
47-
qwen35_vl_9b_pretrain_config,
47+
qwen35_vl_9b_pretrain_mock_config,
4848
qwen35_vl_9b_sft_config,
4949
qwen35_vl_27b_peft_config,
5050
qwen35_vl_27b_sft_config,
5151
qwen35_vl_35b_a3b_fsdp_sft_config,
5252
qwen35_vl_35b_a3b_peft_config,
53-
qwen35_vl_35b_a3b_pretrain_config,
53+
qwen35_vl_35b_a3b_pretrain_mock_config,
5454
qwen35_vl_35b_a3b_sft_config,
5555
qwen35_vl_122b_a10b_peft_config,
56-
qwen35_vl_122b_a10b_pretrain_config,
56+
qwen35_vl_122b_a10b_pretrain_mock_config,
5757
qwen35_vl_122b_a10b_sft_config,
5858
qwen35_vl_397b_a17b_peft_config,
59-
qwen35_vl_397b_a17b_pretrain_config,
59+
qwen35_vl_397b_a17b_pretrain_mock_config,
6060
qwen35_vl_397b_a17b_sft_config,
6161
qwen35_vl_800m_peft_config,
6262
qwen35_vl_800m_sft_config,
@@ -65,10 +65,10 @@
6565

6666
__all__ = [
6767
# Qwen3.5-VL pretrain configs
68-
"qwen35_vl_9b_pretrain_config",
69-
"qwen35_vl_35b_a3b_pretrain_config",
70-
"qwen35_vl_122b_a10b_pretrain_config",
71-
"qwen35_vl_397b_a17b_pretrain_config",
68+
"qwen35_vl_9b_pretrain_mock_config",
69+
"qwen35_vl_35b_a3b_pretrain_mock_config",
70+
"qwen35_vl_122b_a10b_pretrain_mock_config",
71+
"qwen35_vl_397b_a17b_pretrain_mock_config",
7272
# Qwen3.5-VL SFT configs — dense
7373
"qwen35_vl_800m_sft_config",
7474
"qwen35_vl_2b_sft_config",
@@ -101,9 +101,9 @@
101101
"qwen25_vl_32b_peft_config",
102102
"qwen25_vl_72b_peft_config",
103103
# Qwen3-VL pretrain configs
104-
"qwen3_vl_8b_pretrain_config",
105-
"qwen3_vl_30b_a3b_pretrain_config",
106-
"qwen3_vl_235b_a22b_pretrain_config",
104+
"qwen3_vl_8b_pretrain_mock_config",
105+
"qwen3_vl_30b_a3b_pretrain_mock_config",
106+
"qwen3_vl_235b_a22b_pretrain_mock_config",
107107
# Qwen3-VL SFT configs
108108
"qwen3_vl_8b_sft_config",
109109
"qwen3_vl_30b_a3b_sft_config",

src/megatron/bridge/recipes/qwen_vl/qwen35_vl.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _qwen35_vl_apply_peft_scheme(cfg: ConfigContainer, peft_scheme: str | PEFT)
193193
# since both families share the same VLM architecture and mock-dataset pipeline.
194194

195195

196-
def qwen35_vl_9b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
196+
def qwen35_vl_9b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
197197
"""Return a pre-training config for Qwen3.5-VL 9B (dense).
198198
199199
See `_qwen3_vl_common` for the full list of parameters.
@@ -211,7 +211,7 @@ def qwen35_vl_9b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) ->
211211
return _qwen3_vl_common(**combined_kwargs)
212212

213213

214-
def qwen35_vl_35b_a3b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
214+
def qwen35_vl_35b_a3b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
215215
"""Return a pre-training config for Qwen3.5-VL 35B-A3B (MoE).
216216
217217
See `_qwen3_vl_common` for the full list of parameters.
@@ -230,7 +230,7 @@ def qwen35_vl_35b_a3b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]
230230
return _qwen3_vl_common(**combined_kwargs)
231231

232232

233-
def qwen35_vl_122b_a10b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
233+
def qwen35_vl_122b_a10b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
234234
"""Return a pre-training config for Qwen3.5-VL 122B-A10B (MoE).
235235
236236
See `_qwen3_vl_common` for the full list of parameters.
@@ -250,7 +250,7 @@ def qwen35_vl_122b_a10b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwarg
250250
return _qwen3_vl_common(**combined_kwargs)
251251

252252

253-
def qwen35_vl_397b_a17b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
253+
def qwen35_vl_397b_a17b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
254254
"""Return a pre-training config for Qwen3.5-VL 397B-A17B (MoE).
255255
256256
See `_qwen3_vl_common` for the full list of parameters.

src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def _qwen3_vl_common(
208208
# =============================================================================
209209

210210

211-
def qwen3_vl_8b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
211+
def qwen3_vl_8b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
212212
"""Return a pre-training config for Qwen3-VL 8B Instruct.
213213
214214
See `_qwen3_vl_common` for the full list of parameters.
@@ -226,7 +226,7 @@ def qwen3_vl_8b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> C
226226
return _qwen3_vl_common(**combined_kwargs)
227227

228228

229-
def qwen3_vl_30b_a3b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
229+
def qwen3_vl_30b_a3b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
230230
"""Return a pre-training config for Qwen3-VL 30B-A3B (MoE).
231231
232232
See `_qwen3_vl_common` for the full list of parameters.
@@ -245,7 +245,7 @@ def qwen3_vl_30b_a3b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs])
245245
return _qwen3_vl_common(**combined_kwargs)
246246

247247

248-
def qwen3_vl_235b_a22b_pretrain_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
248+
def qwen3_vl_235b_a22b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonKwargs]) -> ConfigContainer:
249249
"""Return a pre-training config for Qwen3-VL 235B-A22B (MoE).
250250
251251
See `_qwen3_vl_common` for the full list of parameters.

0 commit comments

Comments
 (0)