Bases: DeepSeekR1ReasoningParser
Reasoning parser for Nemotron V3 models.
Methods:
Source code in vllm/reasoning/nemotron_v3_reasoning_parser.py
| class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
"""
Reasoning parser for Nemotron V3 models.
"""
def _should_force_content(
self, request: ChatCompletionRequest | ResponsesRequest
) -> bool:
chat_template_kwargs = getattr(request, "chat_template_kwargs", None)
return bool(
chat_template_kwargs
and (
chat_template_kwargs.get("enable_thinking") is False
or chat_template_kwargs.get("force_nonempty_content") is True
)
)
def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
reasoning, final_content = super().extract_reasoning(model_output, request)
if self._should_force_content(request) and (
final_content is None or not final_content.strip()
):
reasoning, final_content = final_content, reasoning
return reasoning, final_content
def get_streaming_fallback_content(
self, text: str, request: ChatCompletionRequest | ResponsesRequest
) -> str | None:
"""Reasoning to duplicate into content on the terminal streaming delta."""
if not self._should_force_content(request):
return None
reasoning, _ = super().extract_reasoning(text, request)
return reasoning
|
get_streaming_fallback_content(text, request)
Reasoning to duplicate into content on the terminal streaming delta.
Source code in vllm/reasoning/nemotron_v3_reasoning_parser.py
| def get_streaming_fallback_content(
self, text: str, request: ChatCompletionRequest | ResponsesRequest
) -> str | None:
"""Reasoning to duplicate into content on the terminal streaming delta."""
if not self._should_force_content(request):
return None
reasoning, _ = super().extract_reasoning(text, request)
return reasoning
|