@@ -1323,7 +1323,9 @@ def _create_completion(
13231323
13241324 completion_id : str = f"cmpl-{ str (uuid .uuid4 ())} "
13251325 created : int = int (time .time ())
1326- completion_tokens : List [int ] = []
1326+ # If prompt is empty, initialize completion with BOS token to avoid
1327+ # detokenization including a space at the beginning of the completion
1328+ completion_tokens : List [int ] = [] if len (prompt ) > 0 else [self .token_bos ()]
13271329 # Add blank space to start of prompt to match OG llama tokenizer
13281330 prompt_tokens : List [int ] = (
13291331 (
@@ -1459,6 +1461,8 @@ def _create_completion(
14591461 # not sure how to handle this branch when dealing
14601462 # with CJK output, so keep it unchanged
14611463 for token in remaining_tokens :
1464+ if token == self .token_bos ():
1465+ continue
14621466 token_end_position += len (self .detokenize ([token ]))
14631467 # Check if stop sequence is in the token
14641468 if token_end_position > (
@@ -1582,6 +1586,8 @@ def _create_completion(
15821586
15831587 logprobs_or_none : Optional [CompletionLogprobs ] = None
15841588 if logprobs is not None :
1589+ if token == self .token_bos ():
1590+ continue
15851591 token_str = self .detokenize ([token ]).decode (
15861592 "utf-8" , errors = "ignore"
15871593 )
@@ -1709,6 +1715,8 @@ def _create_completion(
17091715 for token , token_str , logprobs_token in zip (
17101716 all_tokens , all_token_strs , all_logprobs
17111717 ):
1718+ if token == self .token_bos ():
1719+ continue
17121720 text_offsets .append (text_offset )
17131721 text_offset += len (token_str )
17141722 tokens .append (token_str )
0 commit comments