Files
llmproxy/app/response_parser.py
Vertex-AI-Step-Builder 5c2904e010 feat: 增强工具调用代理功能,支持多工具调用和消息历史转换
主要改进:
- 新增 convert_tool_calls_to_content 函数,将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式
- 修复 response_parser 支持同时解析多个 tool_calls
- 优化响应解析逻辑,支持 content 和 tool_calls 同时存在
- 添加完整的测试覆盖,包括多工具调用、消息转换和混合响应

技术细节:
- services.py: 实现工具调用历史到 content 的转换
- response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析
- main.py: 集成消息转换功能,确保消息历史正确传递给 LLM

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-31 13:33:25 +00:00

349 lines
12 KiB
Python

"""
Response Parser Module
This module provides low-coupling, high-cohesion parsing utilities for extracting
tool calls from LLM responses and converting them to OpenAI-compatible format.
Design principles:
- Single Responsibility: Each function handles one specific parsing task
- Testability: Pure functions that are easy to unit test
- Type Safety: Uses Pydantic models for validation
"""
import re
import json
import logging
from typing import Optional, List, Dict, Any
from uuid import uuid4
from app.models import ResponseMessage, ToolCall, ToolCallFunction
logger = logging.getLogger(__name__)
# Constants for tool call parsing
# Using XML-style tags to avoid confusion with JSON braces
# LLM should emit: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
TOOL_CALL_START_TAG = "<invoke>"
TOOL_CALL_END_TAG = "</invoke>"
class ToolCallParseError(Exception):
"""Raised when tool call parsing fails."""
pass
class ResponseParser:
"""
Parser for converting LLM text responses into structured ResponseMessage objects.
This class encapsulates all parsing logic for tool calls, making it easy to test
and maintain. It follows the Single Responsibility Principle by focusing solely
on parsing responses.
"""
def __init__(self, tool_call_start_tag: str = TOOL_CALL_START_TAG,
tool_call_end_tag: str = TOOL_CALL_END_TAG):
"""
Initialize the parser with configurable tags.
Args:
tool_call_start_tag: The opening tag for tool calls (default: {...")
tool_call_end_tag: The closing tag for tool calls (default: ...})
"""
self.tool_call_start_tag = tool_call_start_tag
self.tool_call_end_tag = tool_call_end_tag
self._compile_regex()
def _compile_regex(self):
"""Compile the regex pattern for tool call extraction."""
# Escape special regex characters in the tags
escaped_start = re.escape(self.tool_call_start_tag)
escaped_end = re.escape(self.tool_call_end_tag)
# Use non-greedy matching to find all tool call occurrences
# This allows us to extract multiple tool calls from a single response
self._tool_call_pattern = re.compile(
f"{escaped_start}.*?{escaped_end}",
re.DOTALL
)
def _extract_valid_json(self, text: str) -> Optional[str]:
"""
Extract a valid JSON object from text that may contain extra content.
This handles cases where non-greedy regex matching includes incomplete JSON.
Args:
text: Text that should contain a JSON object
Returns:
The extracted valid JSON string, or None if not found
"""
text = text.lstrip() # Only strip leading whitespace
# Find the first opening brace (the start of JSON)
start_idx = text.find('{')
if start_idx < 0:
return None
text = text[start_idx:] # Start from the first opening brace
# Find the matching closing brace by counting brackets
brace_count = 0
in_string = False
escape_next = False
for i, char in enumerate(text):
if escape_next:
escape_next = False
continue
if char == '\\' and in_string:
escape_next = True
continue
if char == '"':
in_string = not in_string
continue
if not in_string:
if char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0:
# Found matching closing brace
return text[:i+1]
return None
def parse(self, llm_response: str) -> ResponseMessage:
"""
Parse an LLM response and extract tool calls if present.
This is the main entry point for parsing. It handles both:
1. Responses with tool calls (wrapped in tags)
2. Regular text responses
3. Multiple tool calls in a single response
Args:
llm_response: The raw text response from the LLM
Returns:
ResponseMessage with content and optionally tool_calls
Example:
>>> parser = ResponseParser()
>>> response = parser.parse('Hello world')
>>> response.content
'Hello world'
>>> response = parser.parse('Check the weather.<invo>{"name": "weather", "arguments": {...}}<invoke>')
>>> response.tool_calls[0].function.name
'weather'
"""
if not llm_response:
return ResponseMessage(content=None)
try:
# Find all tool call occurrences
matches = list(self._tool_call_pattern.finditer(llm_response))
if matches:
return self._parse_tool_call_response(llm_response, matches)
else:
return self._parse_text_only_response(llm_response)
except Exception as e:
logger.warning(f"Failed to parse LLM response: {e}. Returning as text.")
return ResponseMessage(content=llm_response)
def _parse_tool_call_response(self, llm_response: str, matches: List[re.Match]) -> ResponseMessage:
"""
Parse a response that contains tool calls.
Args:
llm_response: The full LLM response
matches: List of regex match objects containing the tool calls
Returns:
ResponseMessage with content and tool_calls
"""
tool_calls = []
last_end = 0 # Track the position of the last tool call
for match in matches:
# The match includes start and end tags, so strip them
matched_text = match.group(0)
tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]
# Extract valid JSON by finding matching braces
json_str = self._extract_valid_json(tool_call_str)
if json_str is None:
# Fallback to trying to parse the entire string
json_str = tool_call_str
try:
tool_call_data = json.loads(json_str)
# Create the tool call object
tool_call = self._create_tool_call(tool_call_data)
tool_calls.append(tool_call)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse tool call JSON: {tool_call_str}. Error: {e}")
continue
# Update the last end position
last_end = match.end()
# Extract content before the first tool call tag
first_match_start = matches[0].start()
content_before = llm_response[:first_match_start].strip() if first_match_start > 0 else None
# Extract content between tool calls and after the last tool call
content_parts = []
if content_before:
content_parts.append(content_before)
# Check if there's content after the last tool call
content_after = llm_response[last_end:].strip() if last_end < len(llm_response) else None
if content_after:
content_parts.append(content_after)
# Combine all content parts
content = " ".join(content_parts) if content_parts else None
return ResponseMessage(
content=content,
tool_calls=tool_calls if tool_calls else None
)
def _parse_text_only_response(self, llm_response: str) -> ResponseMessage:
"""
Parse a response with no tool calls.
Args:
llm_response: The full LLM response
Returns:
ResponseMessage with content only
"""
return ResponseMessage(content=llm_response.strip())
def _create_tool_call(self, tool_call_data: Dict[str, Any]) -> ToolCall:
"""
Create a ToolCall object from parsed data.
Args:
tool_call_data: Dictionary containing 'name' and optionally 'arguments'
Returns:
ToolCall object
Raises:
ToolCallParseError: If required fields are missing
"""
name = tool_call_data.get("name")
if not name:
raise ToolCallParseError("Tool call missing 'name' field")
arguments = tool_call_data.get("arguments", {})
# Generate a unique ID for the tool call
tool_call_id = f"call_{name}_{str(uuid4())[:8]}"
return ToolCall(
id=tool_call_id,
type="function",
function=ToolCallFunction(
name=name,
arguments=json.dumps(arguments)
)
)
def parse_streaming_chunks(self, chunks: List[str]) -> ResponseMessage:
"""
Parse a list of streaming chunks and aggregate into a ResponseMessage.
This method handles streaming responses where tool calls might be
split across multiple chunks.
Args:
chunks: List of content chunks from streaming response
Returns:
Parsed ResponseMessage
"""
full_content = "".join(chunks)
return self.parse(full_content)
def parse_native_tool_calls(self, llm_response: Dict[str, Any]) -> ResponseMessage:
"""
Parse a response that already has native OpenAI-format tool calls.
Some LLMs natively support tool calling and return them in the standard
OpenAI format. This method handles those responses.
Args:
llm_response: Dictionary response from LLM with potential tool_calls field
Returns:
ResponseMessage with parsed tool_calls or content
"""
if "tool_calls" in llm_response and llm_response["tool_calls"]:
# Parse native tool calls
tool_calls = []
for tc in llm_response["tool_calls"]:
tool_calls.append(ToolCall(
id=tc.get("id", f"call_{str(uuid4())[:8]}"),
type=tc.get("type", "function"),
function=ToolCallFunction(
name=tc["function"]["name"],
arguments=tc["function"]["arguments"]
)
))
return ResponseMessage(
content=llm_response.get("content"),
tool_calls=tool_calls
)
else:
# Fallback to text parsing
content = llm_response.get("content", "")
return self.parse(content)
# Convenience functions for backward compatibility and ease of use
def parse_response(llm_response: str) -> ResponseMessage:
"""
Parse an LLM response using default parser settings.
This is a convenience function for simple use cases.
Args:
llm_response: The raw text response from the LLM
Returns:
ResponseMessage with parsed content and tool calls
"""
parser = ResponseParser()
return parser.parse(llm_response)
def parse_response_with_custom_tags(llm_response: str,
start_tag: str,
end_tag: str) -> ResponseMessage:
"""
Parse an LLM response using custom tool call tags.
Args:
llm_response: The raw text response from the LLM
start_tag: Custom start tag for tool calls
end_tag: Custom end tag for tool calls
Returns:
ResponseMessage with parsed content and tool calls
"""
parser = ResponseParser(tool_call_start_tag=start_tag, tool_call_end_tag=end_tag)
return parser.parse(llm_response)