@@ -173,7 +173,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
173173 >>> from transformers import AutoTokenizer
174174 >>> from nemo_rl.algorithms.utils import get_tokenizer
175175 >>> # not specifying a chat template uses the tokenizer's default
176- >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct"}
176+ >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct", "is_tokenizer_processor": False }
177177 >>> tokenizer = get_tokenizer(config)
178178 No chat template provided, using tokenizer's default
179179 >>> messages = [
@@ -186,7 +186,8 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
186186 >>> # Using a passthrough template
187187 >>> config = {
188188 ... "name": "meta-llama/Llama-3.2-1B-Instruct",
189- ... "chat_template": None
189+ ... "chat_template": None,
190+ ... "is_tokenizer_processor": False,
190191 ... }
191192 >>> tokenizer = get_tokenizer(config)
192193 Using passthrough chat template
@@ -196,7 +197,8 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
196197 >>> # Using a custom template
197198 >>> config = {
198199 ... "name": "meta-llama/Llama-3.2-1B-Instruct",
199- ... "chat_template": "{% for message in messages %}{{ ' START: ' + message['content'] + ' END.' }}{% endfor %}"
200+ ... "chat_template": "{% for message in messages %}{{ ' START: ' + message['content'] + ' END.' }}{% endfor %}",
201+ ... "is_tokenizer_processor": False,
200202 ... }
201203 >>> tokenizer = get_tokenizer(config)
202204 Using custom chat template
0 commit comments