`vllm.reasoning.nemotron_v3_reasoning_parser` ¶

Classes:

NemotronV3ReasoningParser –

Reasoning parser for Nemotron V3 models.

`NemotronV3ReasoningParser` ¶

Bases: DeepSeekR1ReasoningParser

Reasoning parser for Nemotron V3 models.

Methods:

get_streaming_fallback_content –

Reasoning to duplicate into content on the terminal streaming delta.

Source code in vllm/reasoning/nemotron_v3_reasoning_parser.py

class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
    """
    Reasoning parser for Nemotron V3 models.
    """

    def _should_force_content(
        self, request: ChatCompletionRequest | ResponsesRequest
    ) -> bool:
        chat_template_kwargs = getattr(request, "chat_template_kwargs", None)
        return bool(
            chat_template_kwargs
            and (
                chat_template_kwargs.get("enable_thinking") is False
                or chat_template_kwargs.get("force_nonempty_content") is True
            )
        )

    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
        reasoning, final_content = super().extract_reasoning(model_output, request)

        if self._should_force_content(request) and (
            final_content is None or not final_content.strip()
        ):
            reasoning, final_content = final_content, reasoning

        return reasoning, final_content

    def get_streaming_fallback_content(
        self, text: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> str | None:
        """Reasoning to duplicate into content on the terminal streaming delta."""
        if not self._should_force_content(request):
            return None
        reasoning, _ = super().extract_reasoning(text, request)
        return reasoning

`get_streaming_fallback_content(text, request)` ¶

Reasoning to duplicate into content on the terminal streaming delta.

Source code in vllm/reasoning/nemotron_v3_reasoning_parser.py

def get_streaming_fallback_content(
    self, text: str, request: ChatCompletionRequest | ResponsesRequest
) -> str | None:
    """Reasoning to duplicate into content on the terminal streaming delta."""
    if not self._should_force_content(request):
        return None
    reasoning, _ = super().extract_reasoning(text, request)
    return reasoning

vllm.reasoning.nemotron_v3_reasoning_parser ¶

NemotronV3ReasoningParser ¶

get_streaming_fallback_content(text, request) ¶

`vllm.reasoning.nemotron_v3_reasoning_parser` ¶

`NemotronV3ReasoningParser` ¶

`get_streaming_fallback_content(text, request)` ¶