Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions openviking/models/vlm/backends/litellm_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def get_completion(self, prompt: str, thinking: bool = False) -> str:

response = completion(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_completion_async(
self, prompt: str, thinking: bool = False, max_retries: int = 0
Expand All @@ -239,7 +239,7 @@ async def get_completion_async(
try:
response = await acompletion(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")
except Exception as e:
last_error = e
if attempt < max_retries:
Expand Down Expand Up @@ -268,7 +268,7 @@ def get_vision_completion(

response = completion(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_vision_completion_async(
self,
Expand All @@ -289,7 +289,7 @@ async def get_vision_completion_async(

response = await acompletion(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

def _update_token_usage_from_response(self, response) -> None:
"""Update token usage from response."""
Expand Down
8 changes: 4 additions & 4 deletions openviking/models/vlm/backends/openai_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_completion(self, prompt: str, thinking: bool = False) -> str:

response = client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_completion_async(
self, prompt: str, thinking: bool = False, max_retries: int = 0
Expand All @@ -83,7 +83,7 @@ async def get_completion_async(
try:
response = await client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")
except Exception as e:
last_error = e
if attempt < max_retries:
Expand Down Expand Up @@ -168,7 +168,7 @@ def get_vision_completion(

response = client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_vision_completion_async(
self,
Expand All @@ -192,4 +192,4 @@ async def get_vision_completion_async(

response = await client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")
8 changes: 4 additions & 4 deletions openviking/models/vlm/backends/volcengine_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_completion(self, prompt: str, thinking: bool = False) -> str:

response = client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_completion_async(
self, prompt: str, thinking: bool = False, max_retries: int = 0
Expand All @@ -90,7 +90,7 @@ async def get_completion_async(
try:
response = await client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")
except Exception as e:
last_error = e
if attempt < max_retries:
Expand Down Expand Up @@ -238,7 +238,7 @@ def get_vision_completion(

response = client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")

async def get_vision_completion_async(
self,
Expand All @@ -263,4 +263,4 @@ async def get_vision_completion_async(

response = await client.chat.completions.create(**kwargs)
self._update_token_usage_from_response(response)
return response.choices[0].message.content or ""
return self._clean_response(response.choices[0].message.content or "")
7 changes: 7 additions & 0 deletions openviking/models/vlm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
"""VLM base interface and abstract classes"""

import re
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, List, Union
Expand All @@ -10,6 +11,8 @@

from .token_usage import TokenUsageTracker

_THINK_TAG_RE = re.compile(r"<think>[\s\S]*?</think>")


class VLMBase(ABC):
"""VLM base abstract class"""
Expand Down Expand Up @@ -58,6 +61,10 @@ async def get_vision_completion_async(
"""Get vision completion asynchronously"""
pass

def _clean_response(self, content: str) -> str:
"""Strip reasoning tags (e.g. ``<think>...</think>``) from model output."""
return _THINK_TAG_RE.sub("", content).strip()

def is_available(self) -> bool:
"""Check if available"""
return self.api_key is not None or self.api_base is not None
Expand Down
85 changes: 85 additions & 0 deletions tests/models/test_vlm_strip_think_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
# SPDX-License-Identifier: Apache-2.0
"""Tests for stripping <think> reasoning tags from VLM responses."""

import pytest

from openviking.models.vlm.base import _THINK_TAG_RE, VLMBase


class TestStripThinkTags:
"""Test _clean_response strips <think> blocks correctly."""

@pytest.fixture()
def vlm(self):
"""Create a minimal concrete VLMBase for testing."""

class _Stub(VLMBase):
def get_completion(self, prompt, thinking=False):
return ""

async def get_completion_async(self, prompt, thinking=False, max_retries=0):
return ""

def get_vision_completion(self, prompt, images, thinking=False):
return ""

async def get_vision_completion_async(self, prompt, images, thinking=False):
return ""

return _Stub({"api_key": "test"})

def test_no_think_tags(self, vlm):
text = "This is a normal response."
assert vlm._clean_response(text) == "This is a normal response."

def test_single_think_block(self, vlm):
text = "<think>\nI need to analyze this.\n</think>\nThe actual summary."
assert vlm._clean_response(text) == "The actual summary."

def test_think_block_at_end(self, vlm):
text = "Summary text.\n<think>some reasoning</think>"
assert vlm._clean_response(text) == "Summary text."

def test_think_block_in_middle(self, vlm):
text = "Start.<think>reasoning here</think>End."
assert vlm._clean_response(text) == "Start.End."

def test_multiple_think_blocks(self, vlm):
text = "<think>first</think>Hello<think>second</think> world"
assert vlm._clean_response(text) == "Hello world"

def test_multiline_think_block(self, vlm):
text = (
"<think>\nStep 1: analyze the document\n"
"Step 2: summarize\nStep 3: output\n</think>\n"
"# Directory Overview\n\nThis directory contains..."
)
result = vlm._clean_response(text)
assert result.startswith("# Directory Overview")
assert "<think>" not in result

def test_empty_string(self, vlm):
assert vlm._clean_response("") == ""

def test_only_think_block(self, vlm):
text = "<think>all reasoning, no output</think>"
assert vlm._clean_response(text) == ""

def test_nested_angle_brackets_preserved(self, vlm):
text = "Use <b>bold</b> and <i>italic</i> formatting."
assert vlm._clean_response(text) == text

def test_json_with_think_prefix(self, vlm):
text = '<think>let me think</think>\n{"abstract": "summary", "overview": "details"}'
result = vlm._clean_response(text)
assert result == '{"abstract": "summary", "overview": "details"}'


class TestThinkTagRegex:
"""Test the compiled regex pattern directly."""

def test_greedy_minimal(self):
"""Ensure non-greedy matching: each <think>...</think> is matched individually."""
text = "<think>a</think>KEEP<think>b</think>"
assert _THINK_TAG_RE.sub("", text) == "KEEP"
Loading