Initial standalone memabra release

This commit is contained in:
Carlos Ouyang
2026-04-15 11:06:05 +08:00
commit 58f9f221b1
464 changed files with 30256 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
{
"trajectory_id": "traj-failure-missed-memory-001",
"task": {
"task_id": "task-004",
"input": "Use my usual formatting preferences for this write-up.",
"channel": "telegram",
"created_at": "2026-04-14T13:05:00Z",
"user_id": "oza"
},
"context_snapshot": {
"conversation_summary": "User has repeated stable formatting preferences in earlier sessions.",
"environment_summary": "No tool call required.",
"recent_failures": []
},
"candidate_sets": {
"memory": [
{
"id": "mem-format-1",
"type": "memory",
"title": "Telegram formatting preference",
"summary": "Prefer plain text over markdown for Telegram delivery.",
"triggers": ["format", "telegram", "write-up"],
"cost": 0.05,
"confidence": 0.9,
"success_rate": 0.95,
"freshness": 0.95,
"risk": 0.05,
"tags": ["preference", "output"],
"source": "system"
}
],
"skill": [],
"tool": []
},
"decisions": [
{
"step": 1,
"decision_type": "direct_answer",
"selected_ids": [],
"rejected_ids": ["mem-format-1"],
"rationale": "Router failed to recognize a preference-triggered task and skipped memory injection.",
"estimated_cost": 0.0
}
],
"events": [],
"outcome": {
"status": "partial_success",
"steps": 1,
"latency_ms": 300,
"user_corrections": 1,
"tool_errors": 0,
"notes": "Answer was serviceable but ignored known formatting preference."
},
"reward": {
"total": 0.18,
"components": {
"task_success": 0.5,
"retrieval_hit": -0.1,
"tool_error": 0.0,
"user_correction": 0.2,
"latency": 0.02,
"context_cost": 0.0,
"useful_reuse": 0.0
}
}
}

View File

@@ -0,0 +1,67 @@
{
"trajectory_id": "traj-failure-overtool-001",
"task": {
"task_id": "task-003",
"input": "Name this project.",
"channel": "telegram",
"created_at": "2026-04-14T13:04:00Z",
"user_id": "oza"
},
"context_snapshot": {
"conversation_summary": "User asks for naming help for an agent memory project.",
"environment_summary": "No real-time state lookup required.",
"recent_failures": ["The agent previously overused tools for pure reasoning tasks."]
},
"candidate_sets": {
"memory": [],
"skill": [],
"tool": [
{
"id": "tool-web-1",
"type": "tool",
"title": "web_search",
"summary": "Search the web for information.",
"triggers": ["name", "idea"],
"cost": 0.4,
"confidence": 0.62,
"success_rate": 0.55,
"freshness": 1.0,
"risk": 0.3,
"tags": ["research"],
"source": "system"
}
],
"skill": []
},
"decisions": [
{
"step": 1,
"decision_type": "call_tool",
"selected_ids": ["tool-web-1"],
"rejected_ids": [],
"rationale": "Incorrectly treated naming as a research task rather than a reasoning task.",
"estimated_cost": 0.4
}
],
"events": [],
"outcome": {
"status": "failure",
"steps": 2,
"latency_ms": 2400,
"user_corrections": 1,
"tool_errors": 1,
"notes": "Over-tooled a pure reasoning task and forced unnecessary latency."
},
"reward": {
"total": -0.82,
"components": {
"task_success": -0.3,
"retrieval_hit": 0.0,
"tool_error": 0.35,
"user_correction": 0.25,
"latency": 0.12,
"context_cost": 0.1,
"useful_reuse": 0.0
}
}
}

View File

@@ -0,0 +1,66 @@
{
"trajectory_id": "traj-success-memory-001",
"task": {
"task_id": "task-001",
"input": "Remember my preferred deployment region and use it next time.",
"channel": "telegram",
"created_at": "2026-04-14T13:02:00Z",
"user_id": "oza"
},
"context_snapshot": {
"conversation_summary": "User is defining a local agent memory project and references recurring preferences.",
"environment_summary": "No live tool call required.",
"recent_failures": []
},
"candidate_sets": {
"memory": [
{
"id": "mem-region-1",
"type": "memory",
"title": "Preferred deployment region",
"summary": "User prefers us-west-2 for deployments.",
"triggers": ["deployment", "region", "preference"],
"cost": 0.1,
"confidence": 0.93,
"success_rate": 0.88,
"freshness": 0.9,
"risk": 0.1,
"tags": ["preference", "deployment"],
"source": "user"
}
],
"skill": [],
"tool": []
},
"decisions": [
{
"step": 1,
"decision_type": "inject_memory",
"selected_ids": ["mem-region-1"],
"rejected_ids": [],
"rationale": "User request depends on a stable preference, so memory injection is the lowest-cost correct route.",
"estimated_cost": 0.1
}
],
"events": [],
"outcome": {
"status": "success",
"steps": 1,
"latency_ms": 350,
"user_corrections": 0,
"tool_errors": 0,
"notes": "Correctly identified preference storage request without unnecessary tools."
},
"reward": {
"total": 1.72,
"components": {
"task_success": 1.0,
"retrieval_hit": 0.45,
"tool_error": 0.0,
"user_correction": 0.0,
"latency": 0.03,
"context_cost": 0.05,
"useful_reuse": 0.35
}
}
}

View File

@@ -0,0 +1,67 @@
{
"trajectory_id": "traj-success-tool-001",
"task": {
"task_id": "task-002",
"input": "Check the current test status for the prototype.",
"channel": "telegram",
"created_at": "2026-04-14T13:03:00Z",
"user_id": "oza"
},
"context_snapshot": {
"conversation_summary": "User wants concrete progress on the memabra prototype.",
"environment_summary": "Pytest is available in the local repo environment.",
"recent_failures": []
},
"candidate_sets": {
"memory": [],
"skill": [],
"tool": [
{
"id": "tool-terminal-1",
"type": "tool",
"title": "terminal",
"summary": "Run shell commands in the local environment.",
"triggers": ["check", "current", "test"],
"cost": 0.2,
"confidence": 0.95,
"success_rate": 0.92,
"freshness": 1.0,
"risk": 0.2,
"tags": ["system", "tests"],
"source": "system"
}
],
"skill": []
},
"decisions": [
{
"step": 1,
"decision_type": "call_tool",
"selected_ids": ["tool-terminal-1"],
"rejected_ids": [],
"rationale": "Current test status is a live system fact and must be observed with a tool.",
"estimated_cost": 0.2
}
],
"events": [],
"outcome": {
"status": "success",
"steps": 1,
"latency_ms": 700,
"user_corrections": 0,
"tool_errors": 0,
"notes": "Terminal used appropriately to inspect live test state."
},
"reward": {
"total": 1.6,
"components": {
"task_success": 1.0,
"retrieval_hit": 0.4,
"tool_error": 0.0,
"user_correction": 0.0,
"latency": 0.08,
"context_cost": 0.02,
"useful_reuse": 0.3
}
}
}