feat: 增强工具调用代理功能,支持多工具调用和消息历史转换

主要改进:
- 新增 convert_tool_calls_to_content 函数,将消息历史中的 tool_calls 转换为 LLM 可理解的 XML 格式
- 修复 response_parser 支持同时解析多个 tool_calls
- 优化响应解析逻辑,支持 content 和 tool_calls 同时存在
- 添加完整的测试覆盖,包括多工具调用、消息转换和混合响应

技术细节:
- services.py: 实现工具调用历史到 content 的转换
- response_parser.py: 使用非贪婪匹配支持多个 tool_calls 解析
- main.py: 集成消息转换功能,确保消息历史正确传递给 LLM

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Vertex-AI-Step-Builder
2025-12-31 13:33:25 +00:00
parent f7508d915b
commit 5c2904e010
6 changed files with 624 additions and 31 deletions

154
test_multiple_tool_calls.py Normal file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
测试多个 tool_calls 的完整流程
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from app.services import convert_tool_calls_to_content
from app.response_parser import ResponseParser
from app.models import ChatMessage
def test_multiple_tool_calls():
"""测试多个 tool_calls 的完整流程"""
print("=" * 60)
print("测试场景:消息历史中有多个 tool_calls")
print("=" * 60)
# 模拟对话场景
# 用户问北京和上海的天气assistant 调用了两个工具
messages = [
ChatMessage(
role="user",
content="帮我查一下北京和上海的天气"
),
ChatMessage(
role="assistant",
tool_calls=[
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"location": "北京", "unit": "celsius"}'
}
},
{
"id": "call_2",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"location": "上海", "unit": "celsius"}'
}
}
]
),
ChatMessage(
role="user",
content="结果怎么样?"
)
]
print("\n1. 原始消息:")
for i, msg in enumerate(messages):
print(f" 消息 {i+1}: {msg.role}")
if msg.content:
print(f" 内容: {msg.content}")
if msg.tool_calls:
print(f" 工具调用: {len(msg.tool_calls)}")
for j, tc in enumerate(msg.tool_calls):
print(f" {j+1}. {tc['function']['name']}")
# 转换 tool_calls 到 content
print("\n2. 转换后的消息(发送给 LLM:")
converted = convert_tool_calls_to_content(messages)
for i, msg in enumerate(converted):
print(f" 消息 {i+1}: {msg.role}")
if msg.content:
# 只显示前 150 个字符
content_preview = msg.content[:150] + "..." if len(msg.content) > 150 else msg.content
print(f" 内容: {content_preview}")
# 验证转换
assert "<invoke>" in converted[1].content
assert converted[1].content.count("<invoke>") == 2
print("\n ✓ 转换成功!两个 tool_calls 都被转换成 XML 标签格式")
# 模拟 LLM 返回新的响应(也包含多个 tool_calls
print("\n3. 模拟 LLM 响应(包含多个 tool_calls:")
llm_response = '''好的,我来帮你查一下其他城市的天气。
<invoke>{"name": "get_weather", "arguments": {"location": "广州"}}</invoke>
<invoke>{"name": "get_weather", "arguments": {"location": "深圳"}}</invoke>
请稍等。'''
print(f" {llm_response}")
# 解析 LLM 响应
print("\n4. 解析 LLM 响应:")
parser = ResponseParser()
parsed = parser.parse(llm_response)
print(f" Content: {parsed.content}")
print(f" Tool calls 数量: {len(parsed.tool_calls) if parsed.tool_calls else 0}")
if parsed.tool_calls:
for i, tc in enumerate(parsed.tool_calls):
import json
args = json.loads(tc.function.arguments)
print(f" {i+1}. {tc.function.name}(location={args['location']})")
# 验证解析
assert parsed.tool_calls is not None
assert len(parsed.tool_calls) == 2
assert parsed.tool_calls[0].function.name == "get_weather"
assert parsed.tool_calls[1].function.name == "get_weather"
print("\n ✓ 解析成功!两个 tool_calls 都被正确提取")
# 测试场景 2单个 tool_call向后兼容
print("\n" + "=" * 60)
print("测试场景:单个 tool_call向后兼容性")
print("=" * 60)
single_response = '''我来帮你查询。
<invoke>{"name": "search", "arguments": {"query": "今天天气"}}</invoke>'''
parsed_single = parser.parse(single_response)
print(f"Content: {parsed_single.content}")
print(f"Tool calls 数量: {len(parsed_single.tool_calls) if parsed_single.tool_calls else 0}")
assert parsed_single.tool_calls is not None
assert len(parsed_single.tool_calls) == 1
assert parsed_single.tool_calls[0].function.name == "search"
print("✓ 单个 tool_call 解析正常")
# 测试场景 3没有 tool_call
print("\n" + "=" * 60)
print("测试场景:没有 tool_call")
print("=" * 60)
no_tool_response = "你好!有什么可以帮助你的吗?"
parsed_no_tool = parser.parse(no_tool_response)
print(f"Content: {parsed_no_tool.content}")
print(f"Tool calls: {parsed_no_tool.tool_calls}")
assert parsed_no_tool.content == no_tool_response
assert parsed_no_tool.tool_calls is None
print("✓ 普通文本响应解析正常")
print("\n" + "=" * 60)
print("所有测试通过! ✓")
print("=" * 60)
print("\n总结:")
print("- 消息历史中的多个 tool_calls 可以正确转换为 XML 格式")
print("- LLM 响应中的多个 tool_calls 可以正确解析")
print("- 向后兼容单个 tool_call 和普通文本响应")
if __name__ == "__main__":
test_multiple_tool_calls()