主要改进: - 新增 convert_tool_calls_to_content 函数,将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式 - 修复 response_parser 支持同时解析多个 tool_calls - 优化响应解析逻辑,支持 content 和 tool_calls 同时存在 - 添加完整的测试覆盖,包括多工具调用、消息转换和混合响应 技术细节: - services.py: 实现工具调用历史到 content 的转换 - response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析 - main.py: 集成消息转换功能,确保消息历史正确传递给 LLM 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
349 lines
12 KiB
Python
349 lines
12 KiB
Python
"""
|
|
Response Parser Module
|
|
|
|
This module provides low-coupling, high-cohesion parsing utilities for extracting
|
|
tool calls from LLM responses and converting them to OpenAI-compatible format.
|
|
|
|
Design principles:
|
|
- Single Responsibility: Each function handles one specific parsing task
|
|
- Testability: Pure functions that are easy to unit test
|
|
- Type Safety: Uses Pydantic models for validation
|
|
"""
|
|
|
|
import re
|
|
import json
|
|
import logging
|
|
from typing import Optional, List, Dict, Any
|
|
from uuid import uuid4
|
|
|
|
from app.models import ResponseMessage, ToolCall, ToolCallFunction
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Constants for tool call parsing
|
|
# Using XML-style tags to avoid confusion with JSON braces
|
|
# LLM should emit: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
|
|
TOOL_CALL_START_TAG = "<invoke>"
|
|
TOOL_CALL_END_TAG = "</invoke>"
|
|
|
|
|
|
class ToolCallParseError(Exception):
|
|
"""Raised when tool call parsing fails."""
|
|
pass
|
|
|
|
|
|
class ResponseParser:
|
|
"""
|
|
Parser for converting LLM text responses into structured ResponseMessage objects.
|
|
|
|
This class encapsulates all parsing logic for tool calls, making it easy to test
|
|
and maintain. It follows the Single Responsibility Principle by focusing solely
|
|
on parsing responses.
|
|
"""
|
|
|
|
def __init__(self, tool_call_start_tag: str = TOOL_CALL_START_TAG,
|
|
tool_call_end_tag: str = TOOL_CALL_END_TAG):
|
|
"""
|
|
Initialize the parser with configurable tags.
|
|
|
|
Args:
|
|
tool_call_start_tag: The opening tag for tool calls (default: {...")
|
|
tool_call_end_tag: The closing tag for tool calls (default: ...})
|
|
"""
|
|
self.tool_call_start_tag = tool_call_start_tag
|
|
self.tool_call_end_tag = tool_call_end_tag
|
|
self._compile_regex()
|
|
|
|
def _compile_regex(self):
|
|
"""Compile the regex pattern for tool call extraction."""
|
|
# Escape special regex characters in the tags
|
|
escaped_start = re.escape(self.tool_call_start_tag)
|
|
escaped_end = re.escape(self.tool_call_end_tag)
|
|
# Use non-greedy matching to find all tool call occurrences
|
|
# This allows us to extract multiple tool calls from a single response
|
|
self._tool_call_pattern = re.compile(
|
|
f"{escaped_start}.*?{escaped_end}",
|
|
re.DOTALL
|
|
)
|
|
|
|
def _extract_valid_json(self, text: str) -> Optional[str]:
|
|
"""
|
|
Extract a valid JSON object from text that may contain extra content.
|
|
|
|
This handles cases where non-greedy regex matching includes incomplete JSON.
|
|
|
|
Args:
|
|
text: Text that should contain a JSON object
|
|
|
|
Returns:
|
|
The extracted valid JSON string, or None if not found
|
|
"""
|
|
text = text.lstrip() # Only strip leading whitespace
|
|
|
|
# Find the first opening brace (the start of JSON)
|
|
start_idx = text.find('{')
|
|
if start_idx < 0:
|
|
return None
|
|
|
|
text = text[start_idx:] # Start from the first opening brace
|
|
|
|
# Find the matching closing brace by counting brackets
|
|
brace_count = 0
|
|
in_string = False
|
|
escape_next = False
|
|
|
|
for i, char in enumerate(text):
|
|
if escape_next:
|
|
escape_next = False
|
|
continue
|
|
|
|
if char == '\\' and in_string:
|
|
escape_next = True
|
|
continue
|
|
|
|
if char == '"':
|
|
in_string = not in_string
|
|
continue
|
|
|
|
if not in_string:
|
|
if char == '{':
|
|
brace_count += 1
|
|
elif char == '}':
|
|
brace_count -= 1
|
|
if brace_count == 0:
|
|
# Found matching closing brace
|
|
return text[:i+1]
|
|
|
|
return None
|
|
|
|
def parse(self, llm_response: str) -> ResponseMessage:
|
|
"""
|
|
Parse an LLM response and extract tool calls if present.
|
|
|
|
This is the main entry point for parsing. It handles both:
|
|
1. Responses with tool calls (wrapped in tags)
|
|
2. Regular text responses
|
|
3. Multiple tool calls in a single response
|
|
|
|
Args:
|
|
llm_response: The raw text response from the LLM
|
|
|
|
Returns:
|
|
ResponseMessage with content and optionally tool_calls
|
|
|
|
Example:
|
|
>>> parser = ResponseParser()
|
|
>>> response = parser.parse('Hello world')
|
|
>>> response.content
|
|
'Hello world'
|
|
|
|
>>> response = parser.parse('Check the weather.<invo>{"name": "weather", "arguments": {...}}<invoke>')
|
|
>>> response.tool_calls[0].function.name
|
|
'weather'
|
|
"""
|
|
if not llm_response:
|
|
return ResponseMessage(content=None)
|
|
|
|
try:
|
|
# Find all tool call occurrences
|
|
matches = list(self._tool_call_pattern.finditer(llm_response))
|
|
|
|
if matches:
|
|
return self._parse_tool_call_response(llm_response, matches)
|
|
else:
|
|
return self._parse_text_only_response(llm_response)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse LLM response: {e}. Returning as text.")
|
|
return ResponseMessage(content=llm_response)
|
|
|
|
def _parse_tool_call_response(self, llm_response: str, matches: List[re.Match]) -> ResponseMessage:
|
|
"""
|
|
Parse a response that contains tool calls.
|
|
|
|
Args:
|
|
llm_response: The full LLM response
|
|
matches: List of regex match objects containing the tool calls
|
|
|
|
Returns:
|
|
ResponseMessage with content and tool_calls
|
|
"""
|
|
tool_calls = []
|
|
last_end = 0 # Track the position of the last tool call
|
|
|
|
for match in matches:
|
|
# The match includes start and end tags, so strip them
|
|
matched_text = match.group(0)
|
|
tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]
|
|
|
|
# Extract valid JSON by finding matching braces
|
|
json_str = self._extract_valid_json(tool_call_str)
|
|
if json_str is None:
|
|
# Fallback to trying to parse the entire string
|
|
json_str = tool_call_str
|
|
|
|
try:
|
|
tool_call_data = json.loads(json_str)
|
|
# Create the tool call object
|
|
tool_call = self._create_tool_call(tool_call_data)
|
|
tool_calls.append(tool_call)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"Failed to parse tool call JSON: {tool_call_str}. Error: {e}")
|
|
continue
|
|
|
|
# Update the last end position
|
|
last_end = match.end()
|
|
|
|
# Extract content before the first tool call tag
|
|
first_match_start = matches[0].start()
|
|
content_before = llm_response[:first_match_start].strip() if first_match_start > 0 else None
|
|
|
|
# Extract content between tool calls and after the last tool call
|
|
content_parts = []
|
|
if content_before:
|
|
content_parts.append(content_before)
|
|
|
|
# Check if there's content after the last tool call
|
|
content_after = llm_response[last_end:].strip() if last_end < len(llm_response) else None
|
|
if content_after:
|
|
content_parts.append(content_after)
|
|
|
|
# Combine all content parts
|
|
content = " ".join(content_parts) if content_parts else None
|
|
|
|
return ResponseMessage(
|
|
content=content,
|
|
tool_calls=tool_calls if tool_calls else None
|
|
)
|
|
|
|
def _parse_text_only_response(self, llm_response: str) -> ResponseMessage:
|
|
"""
|
|
Parse a response with no tool calls.
|
|
|
|
Args:
|
|
llm_response: The full LLM response
|
|
|
|
Returns:
|
|
ResponseMessage with content only
|
|
"""
|
|
return ResponseMessage(content=llm_response.strip())
|
|
|
|
def _create_tool_call(self, tool_call_data: Dict[str, Any]) -> ToolCall:
|
|
"""
|
|
Create a ToolCall object from parsed data.
|
|
|
|
Args:
|
|
tool_call_data: Dictionary containing 'name' and optionally 'arguments'
|
|
|
|
Returns:
|
|
ToolCall object
|
|
|
|
Raises:
|
|
ToolCallParseError: If required fields are missing
|
|
"""
|
|
name = tool_call_data.get("name")
|
|
if not name:
|
|
raise ToolCallParseError("Tool call missing 'name' field")
|
|
|
|
arguments = tool_call_data.get("arguments", {})
|
|
|
|
# Generate a unique ID for the tool call
|
|
tool_call_id = f"call_{name}_{str(uuid4())[:8]}"
|
|
|
|
return ToolCall(
|
|
id=tool_call_id,
|
|
type="function",
|
|
function=ToolCallFunction(
|
|
name=name,
|
|
arguments=json.dumps(arguments)
|
|
)
|
|
)
|
|
|
|
def parse_streaming_chunks(self, chunks: List[str]) -> ResponseMessage:
|
|
"""
|
|
Parse a list of streaming chunks and aggregate into a ResponseMessage.
|
|
|
|
This method handles streaming responses where tool calls might be
|
|
split across multiple chunks.
|
|
|
|
Args:
|
|
chunks: List of content chunks from streaming response
|
|
|
|
Returns:
|
|
Parsed ResponseMessage
|
|
"""
|
|
full_content = "".join(chunks)
|
|
return self.parse(full_content)
|
|
|
|
def parse_native_tool_calls(self, llm_response: Dict[str, Any]) -> ResponseMessage:
|
|
"""
|
|
Parse a response that already has native OpenAI-format tool calls.
|
|
|
|
Some LLMs natively support tool calling and return them in the standard
|
|
OpenAI format. This method handles those responses.
|
|
|
|
Args:
|
|
llm_response: Dictionary response from LLM with potential tool_calls field
|
|
|
|
Returns:
|
|
ResponseMessage with parsed tool_calls or content
|
|
"""
|
|
if "tool_calls" in llm_response and llm_response["tool_calls"]:
|
|
# Parse native tool calls
|
|
tool_calls = []
|
|
for tc in llm_response["tool_calls"]:
|
|
tool_calls.append(ToolCall(
|
|
id=tc.get("id", f"call_{str(uuid4())[:8]}"),
|
|
type=tc.get("type", "function"),
|
|
function=ToolCallFunction(
|
|
name=tc["function"]["name"],
|
|
arguments=tc["function"]["arguments"]
|
|
)
|
|
))
|
|
|
|
return ResponseMessage(
|
|
content=llm_response.get("content"),
|
|
tool_calls=tool_calls
|
|
)
|
|
else:
|
|
# Fallback to text parsing
|
|
content = llm_response.get("content", "")
|
|
return self.parse(content)
|
|
|
|
|
|
# Convenience functions for backward compatibility and ease of use
|
|
|
|
def parse_response(llm_response: str) -> ResponseMessage:
|
|
"""
|
|
Parse an LLM response using default parser settings.
|
|
|
|
This is a convenience function for simple use cases.
|
|
|
|
Args:
|
|
llm_response: The raw text response from the LLM
|
|
|
|
Returns:
|
|
ResponseMessage with parsed content and tool calls
|
|
"""
|
|
parser = ResponseParser()
|
|
return parser.parse(llm_response)
|
|
|
|
|
|
def parse_response_with_custom_tags(llm_response: str,
|
|
start_tag: str,
|
|
end_tag: str) -> ResponseMessage:
|
|
"""
|
|
Parse an LLM response using custom tool call tags.
|
|
|
|
Args:
|
|
llm_response: The raw text response from the LLM
|
|
start_tag: Custom start tag for tool calls
|
|
end_tag: Custom end tag for tool calls
|
|
|
|
Returns:
|
|
ResponseMessage with parsed content and tool calls
|
|
"""
|
|
parser = ResponseParser(tool_call_start_tag=start_tag, tool_call_end_tag=end_tag)
|
|
return parser.parse(llm_response)
|