feat: 增强工具调用代理功能，支持多工具调用和消息历史转换

主要改进: - 新增 convert_tool_calls_to_content 函数，将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式 - 修复 response_parser 支持同时解析多个 tool_calls - 优化响应解析逻辑，支持 content 和 tool_calls 同时存在 - 添加完整的测试覆盖，包括多工具调用、消息转换和混合响应技术细节: - services.py: 实现工具调用历史到 content 的转换 - response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析 - main.py: 集成消息转换功能，确保消息历史正确传递给 LLM 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-31 13:33:25 +00:00
parent f7508d915b
commit 5c2904e010
6 changed files with 624 additions and 31 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -8,7 +8,7 @@ from fastapi import FastAPI, HTTPException, Depends, Request
 from starlette.responses import StreamingResponse

 from .models import IncomingRequest, ProxyResponse
-from .services import process_chat_request, stream_llm_api, inject_tools_into_prompt, parse_llm_response_from_content, _parse_sse_data
+from .services import process_chat_request, stream_llm_api, inject_tools_into_prompt, parse_llm_response_from_content, _parse_sse_data, convert_tool_calls_to_content
 from .core.config import get_settings, Settings
 from .database import init_db, log_request, update_request_log

@@ -87,8 +87,13 @@ async def chat_completions(
        raise HTTPException(status_code=500, detail="LLM API Key or URL is not configured.")

    messages_to_llm = request_obj.messages
+
+    # Convert assistant messages with tool_calls to content format
+    messages_to_llm = convert_tool_calls_to_content(messages_to_llm)
+    logger.info(f"Converted tool calls to content format for log ID: {log_id}")
+
    if request_obj.tools:
-        messages_to_llm = inject_tools_into_prompt(request_obj.messages, request_obj.tools)
+        messages_to_llm = inject_tools_into_prompt(messages_to_llm, request_obj.tools)

    # Handle streaming request
    if request_obj.stream:
--- a/app/response_parser.py
+++ b/app/response_parser.py
@@ -60,10 +60,10 @@ class ResponseParser:
        # Escape special regex characters in the tags
        escaped_start = re.escape(self.tool_call_start_tag)
        escaped_end = re.escape(self.tool_call_end_tag)
-        # Match from start tag to end tag (greedy), including both tags
-        # This ensures we capture the complete JSON object
+        # Use non-greedy matching to find all tool call occurrences
+        # This allows us to extract multiple tool calls from a single response
        self._tool_call_pattern = re.compile(
-            f"{escaped_start}.*{escaped_end}",
+            f"{escaped_start}.*?{escaped_end}",
            re.DOTALL
        )

@@ -124,6 +124,7 @@ class ResponseParser:
        This is the main entry point for parsing. It handles both:
        1. Responses with tool calls (wrapped in tags)
        2. Regular text responses
+        3. Multiple tool calls in a single response

        Args:
            llm_response: The raw text response from the LLM
@@ -145,10 +146,11 @@ class ResponseParser:
            return ResponseMessage(content=None)

        try:
-            match = self._tool_call_pattern.search(llm_response)
+            # Find all tool call occurrences
+            matches = list(self._tool_call_pattern.finditer(llm_response))

-            if match:
-                return self._parse_tool_call_response(llm_response, match)
+            if matches:
+                return self._parse_tool_call_response(llm_response, matches)
            else:
                return self._parse_text_only_response(llm_response)

@@ -156,44 +158,64 @@ class ResponseParser:
            logger.warning(f"Failed to parse LLM response: {e}. Returning as text.")
            return ResponseMessage(content=llm_response)

-    def _parse_tool_call_response(self, llm_response: str, match: re.Match) -> ResponseMessage:
+    def _parse_tool_call_response(self, llm_response: str, matches: List[re.Match]) -> ResponseMessage:
        """
        Parse a response that contains tool calls.

        Args:
            llm_response: The full LLM response
-            match: The regex match object containing the tool call
+            matches: List of regex match objects containing the tool calls

        Returns:
            ResponseMessage with content and tool_calls
        """
-        # The match includes start and end tags, so strip them
-        matched_text = match.group(0)
-        tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]
+        tool_calls = []
+        last_end = 0  # Track the position of the last tool call

-        # Extract valid JSON by finding matching braces
-        json_str = self._extract_valid_json(tool_call_str)
-        if json_str is None:
-            # Fallback to trying to parse the entire string
-            json_str = tool_call_str
+        for match in matches:
+            # The match includes start and end tags, so strip them
+            matched_text = match.group(0)
+            tool_call_str = matched_text[len(self.tool_call_start_tag):-len(self.tool_call_end_tag)]

-        try:
-            tool_call_data = json.loads(json_str)
+            # Extract valid JSON by finding matching braces
+            json_str = self._extract_valid_json(tool_call_str)
+            if json_str is None:
+                # Fallback to trying to parse the entire string
+                json_str = tool_call_str

-            # Extract content before the tool call tag
-            parts = llm_response.split(self.tool_call_start_tag, 1)
-            content = parts[0].strip() if parts[0] else None
+            try:
+                tool_call_data = json.loads(json_str)
+                # Create the tool call object
+                tool_call = self._create_tool_call(tool_call_data)
+                tool_calls.append(tool_call)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse tool call JSON: {tool_call_str}. Error: {e}")
+                continue

-            # Create the tool call object
-            tool_call = self._create_tool_call(tool_call_data)
+            # Update the last end position
+            last_end = match.end()

-            return ResponseMessage(
-                content=content,
-                tool_calls=[tool_call]
-            )
+        # Extract content before the first tool call tag
+        first_match_start = matches[0].start()
+        content_before = llm_response[:first_match_start].strip() if first_match_start > 0 else None

-        except json.JSONDecodeError as e:
-            raise ToolCallParseError(f"Invalid JSON in tool call: {tool_call_str}. Error: {e}")
+        # Extract content between tool calls and after the last tool call
+        content_parts = []
+        if content_before:
+            content_parts.append(content_before)
+
+        # Check if there's content after the last tool call
+        content_after = llm_response[last_end:].strip() if last_end < len(llm_response) else None
+        if content_after:
+            content_parts.append(content_after)
+
+        # Combine all content parts
+        content = " ".join(content_parts) if content_parts else None
+
+        return ResponseMessage(
+            content=content,
+            tool_calls=tool_calls if tool_calls else None
+        )

    def _parse_text_only_response(self, llm_response: str) -> ResponseMessage:
        """
--- a/app/services.py
+++ b/app/services.py
@@ -39,6 +39,70 @@ def _parse_sse_data(chunk: bytes) -> Optional[Dict[str, Any]]:
 # --- End Helper ---


+def convert_tool_calls_to_content(messages: List[ChatMessage]) -> List[ChatMessage]:
+    """
+    Converts assistant messages with tool_calls into content format using XML tags.
+
+    This function processes the message history and converts any assistant messages
+    that have tool_calls into a format that LLMs can understand. The tool_calls
+    are converted to <invoke>...</invoke> tags in the content field.
+
+    Args:
+        messages: List of ChatMessage objects from the client
+
+    Returns:
+        Processed list of ChatMessage objects with tool_calls converted to content
+
+    Example:
+        Input:  [{"role": "assistant", "tool_calls": [...]}]
+        Output: [{"role": "assistant", "content": "<invoke>{...}</invoke>"}]
+    """
+    from .response_parser import TOOL_CALL_START_TAG, TOOL_CALL_END_TAG
+
+    processed_messages = []
+
+    for msg in messages:
+        # Check if this is an assistant message with tool_calls
+        if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) > 0:
+            # Convert each tool call to XML tag format
+            tool_call_contents = []
+            for tc in msg.tool_calls:
+                tc_data = tc.get("function", {})
+                name = tc_data.get("name", "")
+                arguments_str = tc_data.get("arguments", "{}")
+
+                # Parse arguments JSON to ensure it's valid
+                try:
+                    arguments = json.loads(arguments_str) if isinstance(arguments_str, str) else arguments_str
+                except json.JSONDecodeError:
+                    arguments = {}
+
+                # Build the tool call JSON
+                tool_call_json = {"name": name, "arguments": arguments}
+
+                # Wrap in XML tags
+                tool_call_content = f'{TOOL_CALL_START_TAG}{json.dumps(tool_call_json, ensure_ascii=False)}{TOOL_CALL_END_TAG}'
+                tool_call_contents.append(tool_call_content)
+
+            # Create new message with tool calls in content
+            # Preserve original content if it exists
+            content_parts = []
+            if msg.content:
+                content_parts.append(msg.content)
+
+            content_parts.extend(tool_call_contents)
+            new_content = "\n".join(content_parts)
+
+            processed_messages.append(
+                ChatMessage(role=msg.role, content=new_content)
+            )
+        else:
+            # Keep other messages as-is
+            processed_messages.append(msg)
+
+    return processed_messages
+
+
 def inject_tools_into_prompt(messages: List[ChatMessage], tools: List[Tool]) -> List[ChatMessage]:
    """
    Injects a system prompt with tool definitions at the beginning of the message list.