#!/usr/bin/env python3 """ 测试多个 tool_calls 的完整流程 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from app.services import convert_tool_calls_to_content from app.response_parser import ResponseParser from app.models import ChatMessage def test_multiple_tool_calls(): """测试多个 tool_calls 的完整流程""" print("=" * 60) print("测试场景:消息历史中有多个 tool_calls") print("=" * 60) # 模拟对话场景 # 用户问北京和上海的天气,assistant 调用了两个工具 messages = [ ChatMessage( role="user", content="帮我查一下北京和上海的天气" ), ChatMessage( role="assistant", tool_calls=[ { "id": "call_1", "type": "function", "function": { "name": "get_weather", "arguments": '{"location": "北京", "unit": "celsius"}' } }, { "id": "call_2", "type": "function", "function": { "name": "get_weather", "arguments": '{"location": "上海", "unit": "celsius"}' } } ] ), ChatMessage( role="user", content="结果怎么样?" ) ] print("\n1. 原始消息:") for i, msg in enumerate(messages): print(f" 消息 {i+1}: {msg.role}") if msg.content: print(f" 内容: {msg.content}") if msg.tool_calls: print(f" 工具调用: {len(msg.tool_calls)} 个") for j, tc in enumerate(msg.tool_calls): print(f" {j+1}. {tc['function']['name']}") # 转换 tool_calls 到 content print("\n2. 转换后的消息(发送给 LLM):") converted = convert_tool_calls_to_content(messages) for i, msg in enumerate(converted): print(f" 消息 {i+1}: {msg.role}") if msg.content: # 只显示前 150 个字符 content_preview = msg.content[:150] + "..." if len(msg.content) > 150 else msg.content print(f" 内容: {content_preview}") # 验证转换 assert "" in converted[1].content assert converted[1].content.count("") == 2 print("\n ✓ 转换成功!两个 tool_calls 都被转换成 XML 标签格式") # 模拟 LLM 返回新的响应(也包含多个 tool_calls) print("\n3. 模拟 LLM 响应(包含多个 tool_calls):") llm_response = '''好的,我来帮你查一下其他城市的天气。 {"name": "get_weather", "arguments": {"location": "广州"}} {"name": "get_weather", "arguments": {"location": "深圳"}} 请稍等。''' print(f" {llm_response}") # 解析 LLM 响应 print("\n4. 解析 LLM 响应:") parser = ResponseParser() parsed = parser.parse(llm_response) print(f" Content: {parsed.content}") print(f" Tool calls 数量: {len(parsed.tool_calls) if parsed.tool_calls else 0}") if parsed.tool_calls: for i, tc in enumerate(parsed.tool_calls): import json args = json.loads(tc.function.arguments) print(f" {i+1}. {tc.function.name}(location={args['location']})") # 验证解析 assert parsed.tool_calls is not None assert len(parsed.tool_calls) == 2 assert parsed.tool_calls[0].function.name == "get_weather" assert parsed.tool_calls[1].function.name == "get_weather" print("\n ✓ 解析成功!两个 tool_calls 都被正确提取") # 测试场景 2:单个 tool_call(向后兼容) print("\n" + "=" * 60) print("测试场景:单个 tool_call(向后兼容性)") print("=" * 60) single_response = '''我来帮你查询。 {"name": "search", "arguments": {"query": "今天天气"}}''' parsed_single = parser.parse(single_response) print(f"Content: {parsed_single.content}") print(f"Tool calls 数量: {len(parsed_single.tool_calls) if parsed_single.tool_calls else 0}") assert parsed_single.tool_calls is not None assert len(parsed_single.tool_calls) == 1 assert parsed_single.tool_calls[0].function.name == "search" print("✓ 单个 tool_call 解析正常") # 测试场景 3:没有 tool_call print("\n" + "=" * 60) print("测试场景:没有 tool_call") print("=" * 60) no_tool_response = "你好!有什么可以帮助你的吗?" parsed_no_tool = parser.parse(no_tool_response) print(f"Content: {parsed_no_tool.content}") print(f"Tool calls: {parsed_no_tool.tool_calls}") assert parsed_no_tool.content == no_tool_response assert parsed_no_tool.tool_calls is None print("✓ 普通文本响应解析正常") print("\n" + "=" * 60) print("所有测试通过! ✓") print("=" * 60) print("\n总结:") print("- 消息历史中的多个 tool_calls 可以正确转换为 XML 格式") print("- LLM 响应中的多个 tool_calls 可以正确解析") print("- 向后兼容单个 tool_call 和普通文本响应") if __name__ == "__main__": test_multiple_tool_calls()