Initial standalone memabra release
This commit is contained in:
66
docs/examples/trajectory_failure_missed_memory.json
Normal file
66
docs/examples/trajectory_failure_missed_memory.json
Normal file
@@ -0,0 +1,66 @@
|
||||
{
|
||||
"trajectory_id": "traj-failure-missed-memory-001",
|
||||
"task": {
|
||||
"task_id": "task-004",
|
||||
"input": "Use my usual formatting preferences for this write-up.",
|
||||
"channel": "telegram",
|
||||
"created_at": "2026-04-14T13:05:00Z",
|
||||
"user_id": "oza"
|
||||
},
|
||||
"context_snapshot": {
|
||||
"conversation_summary": "User has repeated stable formatting preferences in earlier sessions.",
|
||||
"environment_summary": "No tool call required.",
|
||||
"recent_failures": []
|
||||
},
|
||||
"candidate_sets": {
|
||||
"memory": [
|
||||
{
|
||||
"id": "mem-format-1",
|
||||
"type": "memory",
|
||||
"title": "Telegram formatting preference",
|
||||
"summary": "Prefer plain text over markdown for Telegram delivery.",
|
||||
"triggers": ["format", "telegram", "write-up"],
|
||||
"cost": 0.05,
|
||||
"confidence": 0.9,
|
||||
"success_rate": 0.95,
|
||||
"freshness": 0.95,
|
||||
"risk": 0.05,
|
||||
"tags": ["preference", "output"],
|
||||
"source": "system"
|
||||
}
|
||||
],
|
||||
"skill": [],
|
||||
"tool": []
|
||||
},
|
||||
"decisions": [
|
||||
{
|
||||
"step": 1,
|
||||
"decision_type": "direct_answer",
|
||||
"selected_ids": [],
|
||||
"rejected_ids": ["mem-format-1"],
|
||||
"rationale": "Router failed to recognize a preference-triggered task and skipped memory injection.",
|
||||
"estimated_cost": 0.0
|
||||
}
|
||||
],
|
||||
"events": [],
|
||||
"outcome": {
|
||||
"status": "partial_success",
|
||||
"steps": 1,
|
||||
"latency_ms": 300,
|
||||
"user_corrections": 1,
|
||||
"tool_errors": 0,
|
||||
"notes": "Answer was serviceable but ignored known formatting preference."
|
||||
},
|
||||
"reward": {
|
||||
"total": 0.18,
|
||||
"components": {
|
||||
"task_success": 0.5,
|
||||
"retrieval_hit": -0.1,
|
||||
"tool_error": 0.0,
|
||||
"user_correction": 0.2,
|
||||
"latency": 0.02,
|
||||
"context_cost": 0.0,
|
||||
"useful_reuse": 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
67
docs/examples/trajectory_failure_overtool.json
Normal file
67
docs/examples/trajectory_failure_overtool.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"trajectory_id": "traj-failure-overtool-001",
|
||||
"task": {
|
||||
"task_id": "task-003",
|
||||
"input": "Name this project.",
|
||||
"channel": "telegram",
|
||||
"created_at": "2026-04-14T13:04:00Z",
|
||||
"user_id": "oza"
|
||||
},
|
||||
"context_snapshot": {
|
||||
"conversation_summary": "User asks for naming help for an agent memory project.",
|
||||
"environment_summary": "No real-time state lookup required.",
|
||||
"recent_failures": ["The agent previously overused tools for pure reasoning tasks."]
|
||||
},
|
||||
"candidate_sets": {
|
||||
"memory": [],
|
||||
"skill": [],
|
||||
"tool": [
|
||||
{
|
||||
"id": "tool-web-1",
|
||||
"type": "tool",
|
||||
"title": "web_search",
|
||||
"summary": "Search the web for information.",
|
||||
"triggers": ["name", "idea"],
|
||||
"cost": 0.4,
|
||||
"confidence": 0.62,
|
||||
"success_rate": 0.55,
|
||||
"freshness": 1.0,
|
||||
"risk": 0.3,
|
||||
"tags": ["research"],
|
||||
"source": "system"
|
||||
}
|
||||
],
|
||||
"skill": []
|
||||
},
|
||||
"decisions": [
|
||||
{
|
||||
"step": 1,
|
||||
"decision_type": "call_tool",
|
||||
"selected_ids": ["tool-web-1"],
|
||||
"rejected_ids": [],
|
||||
"rationale": "Incorrectly treated naming as a research task rather than a reasoning task.",
|
||||
"estimated_cost": 0.4
|
||||
}
|
||||
],
|
||||
"events": [],
|
||||
"outcome": {
|
||||
"status": "failure",
|
||||
"steps": 2,
|
||||
"latency_ms": 2400,
|
||||
"user_corrections": 1,
|
||||
"tool_errors": 1,
|
||||
"notes": "Over-tooled a pure reasoning task and forced unnecessary latency."
|
||||
},
|
||||
"reward": {
|
||||
"total": -0.82,
|
||||
"components": {
|
||||
"task_success": -0.3,
|
||||
"retrieval_hit": 0.0,
|
||||
"tool_error": 0.35,
|
||||
"user_correction": 0.25,
|
||||
"latency": 0.12,
|
||||
"context_cost": 0.1,
|
||||
"useful_reuse": 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
66
docs/examples/trajectory_success_memory.json
Normal file
66
docs/examples/trajectory_success_memory.json
Normal file
@@ -0,0 +1,66 @@
|
||||
{
|
||||
"trajectory_id": "traj-success-memory-001",
|
||||
"task": {
|
||||
"task_id": "task-001",
|
||||
"input": "Remember my preferred deployment region and use it next time.",
|
||||
"channel": "telegram",
|
||||
"created_at": "2026-04-14T13:02:00Z",
|
||||
"user_id": "oza"
|
||||
},
|
||||
"context_snapshot": {
|
||||
"conversation_summary": "User is defining a local agent memory project and references recurring preferences.",
|
||||
"environment_summary": "No live tool call required.",
|
||||
"recent_failures": []
|
||||
},
|
||||
"candidate_sets": {
|
||||
"memory": [
|
||||
{
|
||||
"id": "mem-region-1",
|
||||
"type": "memory",
|
||||
"title": "Preferred deployment region",
|
||||
"summary": "User prefers us-west-2 for deployments.",
|
||||
"triggers": ["deployment", "region", "preference"],
|
||||
"cost": 0.1,
|
||||
"confidence": 0.93,
|
||||
"success_rate": 0.88,
|
||||
"freshness": 0.9,
|
||||
"risk": 0.1,
|
||||
"tags": ["preference", "deployment"],
|
||||
"source": "user"
|
||||
}
|
||||
],
|
||||
"skill": [],
|
||||
"tool": []
|
||||
},
|
||||
"decisions": [
|
||||
{
|
||||
"step": 1,
|
||||
"decision_type": "inject_memory",
|
||||
"selected_ids": ["mem-region-1"],
|
||||
"rejected_ids": [],
|
||||
"rationale": "User request depends on a stable preference, so memory injection is the lowest-cost correct route.",
|
||||
"estimated_cost": 0.1
|
||||
}
|
||||
],
|
||||
"events": [],
|
||||
"outcome": {
|
||||
"status": "success",
|
||||
"steps": 1,
|
||||
"latency_ms": 350,
|
||||
"user_corrections": 0,
|
||||
"tool_errors": 0,
|
||||
"notes": "Correctly identified preference storage request without unnecessary tools."
|
||||
},
|
||||
"reward": {
|
||||
"total": 1.72,
|
||||
"components": {
|
||||
"task_success": 1.0,
|
||||
"retrieval_hit": 0.45,
|
||||
"tool_error": 0.0,
|
||||
"user_correction": 0.0,
|
||||
"latency": 0.03,
|
||||
"context_cost": 0.05,
|
||||
"useful_reuse": 0.35
|
||||
}
|
||||
}
|
||||
}
|
||||
67
docs/examples/trajectory_success_tool.json
Normal file
67
docs/examples/trajectory_success_tool.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"trajectory_id": "traj-success-tool-001",
|
||||
"task": {
|
||||
"task_id": "task-002",
|
||||
"input": "Check the current test status for the prototype.",
|
||||
"channel": "telegram",
|
||||
"created_at": "2026-04-14T13:03:00Z",
|
||||
"user_id": "oza"
|
||||
},
|
||||
"context_snapshot": {
|
||||
"conversation_summary": "User wants concrete progress on the memabra prototype.",
|
||||
"environment_summary": "Pytest is available in the local repo environment.",
|
||||
"recent_failures": []
|
||||
},
|
||||
"candidate_sets": {
|
||||
"memory": [],
|
||||
"skill": [],
|
||||
"tool": [
|
||||
{
|
||||
"id": "tool-terminal-1",
|
||||
"type": "tool",
|
||||
"title": "terminal",
|
||||
"summary": "Run shell commands in the local environment.",
|
||||
"triggers": ["check", "current", "test"],
|
||||
"cost": 0.2,
|
||||
"confidence": 0.95,
|
||||
"success_rate": 0.92,
|
||||
"freshness": 1.0,
|
||||
"risk": 0.2,
|
||||
"tags": ["system", "tests"],
|
||||
"source": "system"
|
||||
}
|
||||
],
|
||||
"skill": []
|
||||
},
|
||||
"decisions": [
|
||||
{
|
||||
"step": 1,
|
||||
"decision_type": "call_tool",
|
||||
"selected_ids": ["tool-terminal-1"],
|
||||
"rejected_ids": [],
|
||||
"rationale": "Current test status is a live system fact and must be observed with a tool.",
|
||||
"estimated_cost": 0.2
|
||||
}
|
||||
],
|
||||
"events": [],
|
||||
"outcome": {
|
||||
"status": "success",
|
||||
"steps": 1,
|
||||
"latency_ms": 700,
|
||||
"user_corrections": 0,
|
||||
"tool_errors": 0,
|
||||
"notes": "Terminal used appropriately to inspect live test state."
|
||||
},
|
||||
"reward": {
|
||||
"total": 1.6,
|
||||
"components": {
|
||||
"task_success": 1.0,
|
||||
"retrieval_hit": 0.4,
|
||||
"tool_error": 0.0,
|
||||
"user_correction": 0.0,
|
||||
"latency": 0.08,
|
||||
"context_cost": 0.02,
|
||||
"useful_reuse": 0.3
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user