feat: 增强工具调用代理功能，支持多工具调用和消息历史转换

主要改进: - 新增 convert_tool_calls_to_content 函数，将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式 - 修复 response_parser 支持同时解析多个 tool_calls - 优化响应解析逻辑，支持 content 和 tool_calls 同时存在 - 添加完整的测试覆盖，包括多工具调用、消息转换和混合响应技术细节: - services.py: 实现工具调用历史到 content 的转换 - response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析 - main.py: 集成消息转换功能，确保消息历史正确传递给 LLM 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-31 13:33:25 +00:00
parent f7508d915b
commit 5c2904e010
6 changed files with 624 additions and 31 deletions
--- a/test_multiple_tool_calls.py
+++ b/test_multiple_tool_calls.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""
+测试多个 tool_calls 的完整流程
+"""
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from app.services import convert_tool_calls_to_content
+from app.response_parser import ResponseParser
+from app.models import ChatMessage
+
+def test_multiple_tool_calls():
+    """测试多个 tool_calls 的完整流程"""
+
+    print("=" * 60)
+    print("测试场景：消息历史中有多个 tool_calls")
+    print("=" * 60)
+
+    # 模拟对话场景
+    # 用户问北京和上海的天气，assistant 调用了两个工具
+    messages = [
+        ChatMessage(
+            role="user",
+            content="帮我查一下北京和上海的天气"
+        ),
+        ChatMessage(
+            role="assistant",
+            tool_calls=[
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": '{"location": "北京", "unit": "celsius"}'
+                    }
+                },
+                {
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": '{"location": "上海", "unit": "celsius"}'
+                    }
+                }
+            ]
+        ),
+        ChatMessage(
+            role="user",
+            content="结果怎么样？"
+        )
+    ]
+
+    print("\n1. 原始消息:")
+    for i, msg in enumerate(messages):
+        print(f"   消息 {i+1}: {msg.role}")
+        if msg.content:
+            print(f"      内容: {msg.content}")
+        if msg.tool_calls:
+            print(f"      工具调用: {len(msg.tool_calls)} 个")
+            for j, tc in enumerate(msg.tool_calls):
+                print(f"         {j+1}. {tc['function']['name']}")
+
+    # 转换 tool_calls 到 content
+    print("\n2. 转换后的消息（发送给 LLM）:")
+    converted = convert_tool_calls_to_content(messages)
+    for i, msg in enumerate(converted):
+        print(f"   消息 {i+1}: {msg.role}")
+        if msg.content:
+            # 只显示前 150 个字符
+            content_preview = msg.content[:150] + "..." if len(msg.content) > 150 else msg.content
+            print(f"      内容: {content_preview}")
+
+    # 验证转换
+    assert "<invoke>" in converted[1].content
+    assert converted[1].content.count("<invoke>") == 2
+    print("\n   ✓ 转换成功！两个 tool_calls 都被转换成 XML 标签格式")
+
+    # 模拟 LLM 返回新的响应（也包含多个 tool_calls）
+    print("\n3. 模拟 LLM 响应（包含多个 tool_calls）:")
+    llm_response = '''好的，我来帮你查一下其他城市的天气。
+
+<invoke>{"name": "get_weather", "arguments": {"location": "广州"}}</invoke>
+<invoke>{"name": "get_weather", "arguments": {"location": "深圳"}}</invoke>
+
+请稍等。'''
+
+    print(f"   {llm_response}")
+
+    # 解析 LLM 响应
+    print("\n4. 解析 LLM 响应:")
+    parser = ResponseParser()
+    parsed = parser.parse(llm_response)
+
+    print(f"   Content: {parsed.content}")
+    print(f"   Tool calls 数量: {len(parsed.tool_calls) if parsed.tool_calls else 0}")
+
+    if parsed.tool_calls:
+        for i, tc in enumerate(parsed.tool_calls):
+            import json
+            args = json.loads(tc.function.arguments)
+            print(f"      {i+1}. {tc.function.name}(location={args['location']})")
+
+    # 验证解析
+    assert parsed.tool_calls is not None
+    assert len(parsed.tool_calls) == 2
+    assert parsed.tool_calls[0].function.name == "get_weather"
+    assert parsed.tool_calls[1].function.name == "get_weather"
+    print("\n   ✓ 解析成功！两个 tool_calls 都被正确提取")
+
+    # 测试场景 2：单个 tool_call（向后兼容）
+    print("\n" + "=" * 60)
+    print("测试场景：单个 tool_call（向后兼容性）")
+    print("=" * 60)
+
+    single_response = '''我来帮你查询。
+
+<invoke>{"name": "search", "arguments": {"query": "今天天气"}}</invoke>'''
+
+    parsed_single = parser.parse(single_response)
+    print(f"Content: {parsed_single.content}")
+    print(f"Tool calls 数量: {len(parsed_single.tool_calls) if parsed_single.tool_calls else 0}")
+
+    assert parsed_single.tool_calls is not None
+    assert len(parsed_single.tool_calls) == 1
+    assert parsed_single.tool_calls[0].function.name == "search"
+    print("✓ 单个 tool_call 解析正常")
+
+    # 测试场景 3：没有 tool_call
+    print("\n" + "=" * 60)
+    print("测试场景：没有 tool_call")
+    print("=" * 60)
+
+    no_tool_response = "你好！有什么可以帮助你的吗？"
+
+    parsed_no_tool = parser.parse(no_tool_response)
+    print(f"Content: {parsed_no_tool.content}")
+    print(f"Tool calls: {parsed_no_tool.tool_calls}")
+
+    assert parsed_no_tool.content == no_tool_response
+    assert parsed_no_tool.tool_calls is None
+    print("✓ 普通文本响应解析正常")
+
+    print("\n" + "=" * 60)
+    print("所有测试通过! ✓")
+    print("=" * 60)
+    print("\n总结：")
+    print("- 消息历史中的多个 tool_calls 可以正确转换为 XML 格式")
+    print("- LLM 响应中的多个 tool_calls 可以正确解析")
+    print("- 向后兼容单个 tool_call 和普通文本响应")
+
+if __name__ == "__main__":
+    test_multiple_tool_calls()