67 lines
1.7 KiB
JSON
67 lines
1.7 KiB
JSON
{
|
|
"trajectory_id": "traj-failure-overtool-001",
|
|
"task": {
|
|
"task_id": "task-003",
|
|
"input": "Name this project.",
|
|
"channel": "telegram",
|
|
"created_at": "2026-04-14T13:04:00Z",
|
|
"user_id": "oza"
|
|
},
|
|
"context_snapshot": {
|
|
"conversation_summary": "User asks for naming help for an agent memory project.",
|
|
"environment_summary": "No real-time state lookup required.",
|
|
"recent_failures": ["The agent previously overused tools for pure reasoning tasks."]
|
|
},
|
|
"candidate_sets": {
|
|
"memory": [],
|
|
"skill": [],
|
|
"tool": [
|
|
{
|
|
"id": "tool-web-1",
|
|
"type": "tool",
|
|
"title": "web_search",
|
|
"summary": "Search the web for information.",
|
|
"triggers": ["name", "idea"],
|
|
"cost": 0.4,
|
|
"confidence": 0.62,
|
|
"success_rate": 0.55,
|
|
"freshness": 1.0,
|
|
"risk": 0.3,
|
|
"tags": ["research"],
|
|
"source": "system"
|
|
}
|
|
],
|
|
"skill": []
|
|
},
|
|
"decisions": [
|
|
{
|
|
"step": 1,
|
|
"decision_type": "call_tool",
|
|
"selected_ids": ["tool-web-1"],
|
|
"rejected_ids": [],
|
|
"rationale": "Incorrectly treated naming as a research task rather than a reasoning task.",
|
|
"estimated_cost": 0.4
|
|
}
|
|
],
|
|
"events": [],
|
|
"outcome": {
|
|
"status": "failure",
|
|
"steps": 2,
|
|
"latency_ms": 2400,
|
|
"user_corrections": 1,
|
|
"tool_errors": 1,
|
|
"notes": "Over-tooled a pure reasoning task and forced unnecessary latency."
|
|
},
|
|
"reward": {
|
|
"total": -0.82,
|
|
"components": {
|
|
"task_success": -0.3,
|
|
"retrieval_hit": 0.0,
|
|
"tool_error": 0.35,
|
|
"user_correction": 0.25,
|
|
"latency": 0.12,
|
|
"context_cost": 0.1,
|
|
"useful_reuse": 0.0
|
|
}
|
|
}
|
|
} |