feat: 增强工具调用代理功能,支持多工具调用和消息历史转换

主要改进:
- 新增 convert_tool_calls_to_content 函数,将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式
- 修复 response_parser 支持同时解析多个 tool_calls
- 优化响应解析逻辑,支持 content 和 tool_calls 同时存在
- 添加完整的测试覆盖,包括多工具调用、消息转换和混合响应

技术细节:
- services.py: 实现工具调用历史到 content 的转换
- response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析
- main.py: 集成消息转换功能,确保消息历史正确传递给 LLM

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Vertex-AI-Step-Builder
2025-12-31 13:33:25 +00:00
parent f7508d915b
commit 5c2904e010
6 changed files with 624 additions and 31 deletions

View File

@@ -8,7 +8,7 @@ from fastapi import FastAPI, HTTPException, Depends, Request
from starlette.responses import StreamingResponse
from .models import IncomingRequest, ProxyResponse
from .services import process_chat_request, stream_llm_api, inject_tools_into_prompt, parse_llm_response_from_content, _parse_sse_data
from .services import process_chat_request, stream_llm_api, inject_tools_into_prompt, parse_llm_response_from_content, _parse_sse_data, convert_tool_calls_to_content
from .core.config import get_settings, Settings
from .database import init_db, log_request, update_request_log
@@ -87,8 +87,13 @@ async def chat_completions(
raise HTTPException(status_code=500, detail="LLM API Key or URL is not configured.")
messages_to_llm = request_obj.messages
# Convert assistant messages with tool_calls to content format
messages_to_llm = convert_tool_calls_to_content(messages_to_llm)
logger.info(f"Converted tool calls to content format for log ID: {log_id}")
if request_obj.tools:
messages_to_llm = inject_tools_into_prompt(request_obj.messages, request_obj.tools)
messages_to_llm = inject_tools_into_prompt(messages_to_llm, request_obj.tools)
# Handle streaming request
if request_obj.stream:

View File

@@ -60,10 +60,10 @@ class ResponseParser:
# Escape special regex characters in the tags
escaped_start = re.escape(self.tool_call_start_tag)
escaped_end = re.escape(self.tool_call_end_tag)
# Match from start tag to end tag (greedy), including both tags
# This ensures we capture the complete JSON object
# Use non-greedy matching to find all tool call occurrences
# This allows us to extract multiple tool calls from a single response
self._tool_call_pattern = re.compile(
f"{escaped_start}.*{escaped_end}",
f"{escaped_start}.*?{escaped_end}",
re.DOTALL
)
@@ -124,6 +124,7 @@ class ResponseParser:
This is the main entry point for parsing. It handles both:
1. Responses with tool calls (wrapped in tags)
2. Regular text responses
3. Multiple tool calls in a single response
Args:
llm_response: The raw text response from the LLM
@@ -145,10 +146,11 @@ class ResponseParser:
return ResponseMessage(content=None)
try:
match = self._tool_call_pattern.search(llm_response)
# Find all tool call occurrences
matches = list(self._tool_call_pattern.finditer(llm_response))
if match:
return self._parse_tool_call_response(llm_response, match)
if matches:
return self._parse_tool_call_response(llm_response, matches)
else:
return self._parse_text_only_response(llm_response)
@@ -156,44 +158,64 @@ class ResponseParser:
logger.warning(f"Failed to parse LLM response: {e}. Returning as text.")
return ResponseMessage(content=llm_response)
def _parse_tool_call_response(self, llm_response: str, match: re.Match) -> ResponseMessage:
def _parse_tool_call_response(self, llm_response: str, matches: List[re.Match]) -> ResponseMessage:
"""
Parse a response that contains tool calls.
Args:
llm_response: The full LLM response
match: The regex match object containing the tool call
matches: List of regex match objects containing the tool calls
Returns:
ResponseMessage with content and tool_calls
"""
# The match includes start and end tags, so strip them
matched_text = match.group(0)
tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]
tool_calls = []
last_end = 0 # Track the position of the last tool call
# Extract valid JSON by finding matching braces
json_str = self._extract_valid_json(tool_call_str)
if json_str is None:
# Fallback to trying to parse the entire string
json_str = tool_call_str
for match in matches:
# The match includes start and end tags, so strip them
matched_text = match.group(0)
tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]
try:
tool_call_data = json.loads(json_str)
# Extract valid JSON by finding matching braces
json_str = self._extract_valid_json(tool_call_str)
if json_str is None:
# Fallback to trying to parse the entire string
json_str = tool_call_str
# Extract content before the tool call tag
parts = llm_response.split(self.tool_call_start_tag, 1)
content = parts[0].strip() if parts[0] else None
try:
tool_call_data = json.loads(json_str)
# Create the tool call object
tool_call = self._create_tool_call(tool_call_data)
tool_calls.append(tool_call)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse tool call JSON: {tool_call_str}. Error: {e}")
continue
# Create the tool call object
tool_call = self._create_tool_call(tool_call_data)
# Update the last end position
last_end = match.end()
return ResponseMessage(
content=content,
tool_calls=[tool_call]
)
# Extract content before the first tool call tag
first_match_start = matches[0].start()
content_before = llm_response[:first_match_start].strip() if first_match_start > 0 else None
except json.JSONDecodeError as e:
raise ToolCallParseError(f"Invalid JSON in tool call: {tool_call_str}. Error: {e}")
# Extract content between tool calls and after the last tool call
content_parts = []
if content_before:
content_parts.append(content_before)
# Check if there's content after the last tool call
content_after = llm_response[last_end:].strip() if last_end < len(llm_response) else None
if content_after:
content_parts.append(content_after)
# Combine all content parts
content = " ".join(content_parts) if content_parts else None
return ResponseMessage(
content=content,
tool_calls=tool_calls if tool_calls else None
)
def _parse_text_only_response(self, llm_response: str) -> ResponseMessage:
"""

View File

@@ -39,6 +39,70 @@ def _parse_sse_data(chunk: bytes) -> Optional[Dict[str, Any]]:
# --- End Helper ---
def convert_tool_calls_to_content(messages: List[ChatMessage]) -> List[ChatMessage]:
"""
Converts assistant messages with tool_calls into content format using XML tags.
This function processes the message history and converts any assistant messages
that have tool_calls into a format that LLMs can understand. The tool_calls
are converted to <invoke>...</invoke> tags in the content field.
Args:
messages: List of ChatMessage objects from the client
Returns:
Processed list of ChatMessage objects with tool_calls converted to content
Example:
Input: [{"role": "assistant", "tool_calls": [...]}]
Output: [{"role": "assistant", "content": "<invoke>{...}</invoke>"}]
"""
from .response_parser import TOOL_CALL_START_TAG, TOOL_CALL_END_TAG
processed_messages = []
for msg in messages:
# Check if this is an assistant message with tool_calls
if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) > 0:
# Convert each tool call to XML tag format
tool_call_contents = []
for tc in msg.tool_calls:
tc_data = tc.get("function", {})
name = tc_data.get("name", "")
arguments_str = tc_data.get("arguments", "{}")
# Parse arguments JSON to ensure it's valid
try:
arguments = json.loads(arguments_str) if isinstance(arguments_str, str) else arguments_str
except json.JSONDecodeError:
arguments = {}
# Build the tool call JSON
tool_call_json = {"name": name, "arguments": arguments}
# Wrap in XML tags
tool_call_content = f'{TOOL_CALL_START_TAG}{json.dumps(tool_call_json, ensure_ascii=False)}{TOOL_CALL_END_TAG}'
tool_call_contents.append(tool_call_content)
# Create new message with tool calls in content
# Preserve original content if it exists
content_parts = []
if msg.content:
content_parts.append(msg.content)
content_parts.extend(tool_call_contents)
new_content = "\n".join(content_parts)
processed_messages.append(
ChatMessage(role=msg.role, content=new_content)
)
else:
# Keep other messages as-is
processed_messages.append(msg)
return processed_messages
def inject_tools_into_prompt(messages: List[ChatMessage], tools: List[Tool]) -> List[ChatMessage]:
"""
Injects a system prompt with tool definitions at the beginning of the message list.