Initial standalone memabra release

2026-04-15 11:06:05 +08:00
commit 58f9f221b1
464 changed files with 30256 additions and 0 deletions
--- a/docs/examples/trajectory_failure_missed_memory.json
+++ b/docs/examples/trajectory_failure_missed_memory.json
@@ -0,0 +1,66 @@
+{
+  "trajectory_id": "traj-failure-missed-memory-001",
+  "task": {
+    "task_id": "task-004",
+    "input": "Use my usual formatting preferences for this write-up.",
+    "channel": "telegram",
+    "created_at": "2026-04-14T13:05:00Z",
+    "user_id": "oza"
+  },
+  "context_snapshot": {
+    "conversation_summary": "User has repeated stable formatting preferences in earlier sessions.",
+    "environment_summary": "No tool call required.",
+    "recent_failures": []
+  },
+  "candidate_sets": {
+    "memory": [
+      {
+        "id": "mem-format-1",
+        "type": "memory",
+        "title": "Telegram formatting preference",
+        "summary": "Prefer plain text over markdown for Telegram delivery.",
+        "triggers": ["format", "telegram", "write-up"],
+        "cost": 0.05,
+        "confidence": 0.9,
+        "success_rate": 0.95,
+        "freshness": 0.95,
+        "risk": 0.05,
+        "tags": ["preference", "output"],
+        "source": "system"
+      }
+    ],
+    "skill": [],
+    "tool": []
+  },
+  "decisions": [
+    {
+      "step": 1,
+      "decision_type": "direct_answer",
+      "selected_ids": [],
+      "rejected_ids": ["mem-format-1"],
+      "rationale": "Router failed to recognize a preference-triggered task and skipped memory injection.",
+      "estimated_cost": 0.0
+    }
+  ],
+  "events": [],
+  "outcome": {
+    "status": "partial_success",
+    "steps": 1,
+    "latency_ms": 300,
+    "user_corrections": 1,
+    "tool_errors": 0,
+    "notes": "Answer was serviceable but ignored known formatting preference."
+  },
+  "reward": {
+    "total": 0.18,
+    "components": {
+      "task_success": 0.5,
+      "retrieval_hit": -0.1,
+      "tool_error": 0.0,
+      "user_correction": 0.2,
+      "latency": 0.02,
+      "context_cost": 0.0,
+      "useful_reuse": 0.0
+    }
+  }
+}
--- a/docs/examples/trajectory_failure_overtool.json
+++ b/docs/examples/trajectory_failure_overtool.json
@@ -0,0 +1,67 @@
+{
+  "trajectory_id": "traj-failure-overtool-001",
+  "task": {
+    "task_id": "task-003",
+    "input": "Name this project.",
+    "channel": "telegram",
+    "created_at": "2026-04-14T13:04:00Z",
+    "user_id": "oza"
+  },
+  "context_snapshot": {
+    "conversation_summary": "User asks for naming help for an agent memory project.",
+    "environment_summary": "No real-time state lookup required.",
+    "recent_failures": ["The agent previously overused tools for pure reasoning tasks."]
+  },
+  "candidate_sets": {
+    "memory": [],
+    "skill": [],
+    "tool": [
+      {
+        "id": "tool-web-1",
+        "type": "tool",
+        "title": "web_search",
+        "summary": "Search the web for information.",
+        "triggers": ["name", "idea"],
+        "cost": 0.4,
+        "confidence": 0.62,
+        "success_rate": 0.55,
+        "freshness": 1.0,
+        "risk": 0.3,
+        "tags": ["research"],
+        "source": "system"
+      }
+    ],
+    "skill": []
+  },
+  "decisions": [
+    {
+      "step": 1,
+      "decision_type": "call_tool",
+      "selected_ids": ["tool-web-1"],
+      "rejected_ids": [],
+      "rationale": "Incorrectly treated naming as a research task rather than a reasoning task.",
+      "estimated_cost": 0.4
+    }
+  ],
+  "events": [],
+  "outcome": {
+    "status": "failure",
+    "steps": 2,
+    "latency_ms": 2400,
+    "user_corrections": 1,
+    "tool_errors": 1,
+    "notes": "Over-tooled a pure reasoning task and forced unnecessary latency."
+  },
+  "reward": {
+    "total": -0.82,
+    "components": {
+      "task_success": -0.3,
+      "retrieval_hit": 0.0,
+      "tool_error": 0.35,
+      "user_correction": 0.25,
+      "latency": 0.12,
+      "context_cost": 0.1,
+      "useful_reuse": 0.0
+    }
+  }
+}
--- a/docs/examples/trajectory_success_memory.json
+++ b/docs/examples/trajectory_success_memory.json
@@ -0,0 +1,66 @@
+{
+  "trajectory_id": "traj-success-memory-001",
+  "task": {
+    "task_id": "task-001",
+    "input": "Remember my preferred deployment region and use it next time.",
+    "channel": "telegram",
+    "created_at": "2026-04-14T13:02:00Z",
+    "user_id": "oza"
+  },
+  "context_snapshot": {
+    "conversation_summary": "User is defining a local agent memory project and references recurring preferences.",
+    "environment_summary": "No live tool call required.",
+    "recent_failures": []
+  },
+  "candidate_sets": {
+    "memory": [
+      {
+        "id": "mem-region-1",
+        "type": "memory",
+        "title": "Preferred deployment region",
+        "summary": "User prefers us-west-2 for deployments.",
+        "triggers": ["deployment", "region", "preference"],
+        "cost": 0.1,
+        "confidence": 0.93,
+        "success_rate": 0.88,
+        "freshness": 0.9,
+        "risk": 0.1,
+        "tags": ["preference", "deployment"],
+        "source": "user"
+      }
+    ],
+    "skill": [],
+    "tool": []
+  },
+  "decisions": [
+    {
+      "step": 1,
+      "decision_type": "inject_memory",
+      "selected_ids": ["mem-region-1"],
+      "rejected_ids": [],
+      "rationale": "User request depends on a stable preference, so memory injection is the lowest-cost correct route.",
+      "estimated_cost": 0.1
+    }
+  ],
+  "events": [],
+  "outcome": {
+    "status": "success",
+    "steps": 1,
+    "latency_ms": 350,
+    "user_corrections": 0,
+    "tool_errors": 0,
+    "notes": "Correctly identified preference storage request without unnecessary tools."
+  },
+  "reward": {
+    "total": 1.72,
+    "components": {
+      "task_success": 1.0,
+      "retrieval_hit": 0.45,
+      "tool_error": 0.0,
+      "user_correction": 0.0,
+      "latency": 0.03,
+      "context_cost": 0.05,
+      "useful_reuse": 0.35
+    }
+  }
+}
--- a/docs/examples/trajectory_success_tool.json
+++ b/docs/examples/trajectory_success_tool.json
@@ -0,0 +1,67 @@
+{
+  "trajectory_id": "traj-success-tool-001",
+  "task": {
+    "task_id": "task-002",
+    "input": "Check the current test status for the prototype.",
+    "channel": "telegram",
+    "created_at": "2026-04-14T13:03:00Z",
+    "user_id": "oza"
+  },
+  "context_snapshot": {
+    "conversation_summary": "User wants concrete progress on the memabra prototype.",
+    "environment_summary": "Pytest is available in the local repo environment.",
+    "recent_failures": []
+  },
+  "candidate_sets": {
+    "memory": [],
+    "skill": [],
+    "tool": [
+      {
+        "id": "tool-terminal-1",
+        "type": "tool",
+        "title": "terminal",
+        "summary": "Run shell commands in the local environment.",
+        "triggers": ["check", "current", "test"],
+        "cost": 0.2,
+        "confidence": 0.95,
+        "success_rate": 0.92,
+        "freshness": 1.0,
+        "risk": 0.2,
+        "tags": ["system", "tests"],
+        "source": "system"
+      }
+    ],
+    "skill": []
+  },
+  "decisions": [
+    {
+      "step": 1,
+      "decision_type": "call_tool",
+      "selected_ids": ["tool-terminal-1"],
+      "rejected_ids": [],
+      "rationale": "Current test status is a live system fact and must be observed with a tool.",
+      "estimated_cost": 0.2
+    }
+  ],
+  "events": [],
+  "outcome": {
+    "status": "success",
+    "steps": 1,
+    "latency_ms": 700,
+    "user_corrections": 0,
+    "tool_errors": 0,
+    "notes": "Terminal used appropriately to inspect live test state."
+  },
+  "reward": {
+    "total": 1.6,
+    "components": {
+      "task_success": 1.0,
+      "retrieval_hit": 0.4,
+      "tool_error": 0.0,
+      "user_correction": 0.0,
+      "latency": 0.08,
+      "context_cost": 0.02,
+      "useful_reuse": 0.3
+    }
+  }
+}