Initial standalone memabra release
This commit is contained in:
57
tests/test_replay.py
Normal file
57
tests/test_replay.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from pathlib import Path
|
||||
|
||||
from memabra.persistence import PersistenceStore
|
||||
from memabra.replay import TrajectoryReplay
|
||||
|
||||
|
||||
EXAMPLE_DIR = "docs/examples"
|
||||
|
||||
|
||||
def test_replay_summary_counts_outcomes_and_actions():
|
||||
replay = TrajectoryReplay()
|
||||
summary = replay.summarize_directory(EXAMPLE_DIR)
|
||||
|
||||
assert summary.trajectories == 4
|
||||
assert summary.success_count == 2
|
||||
assert summary.partial_success_count == 1
|
||||
assert summary.failure_count == 1
|
||||
assert summary.direct_answer_count == 1
|
||||
assert summary.memory_action_count == 1
|
||||
assert summary.tool_action_count == 2
|
||||
assert summary.skill_action_count == 0
|
||||
|
||||
|
||||
def test_replay_can_summarize_persisted_artifacts(tmp_path: Path):
|
||||
persistence = PersistenceStore(base_dir=tmp_path / "artifacts")
|
||||
persistence.save_trajectory(
|
||||
{
|
||||
"trajectory_id": "traj-1",
|
||||
"task": {"task_id": "task-1", "input": "A", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
|
||||
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
|
||||
"candidate_sets": {"memory": [], "skill": [], "tool": []},
|
||||
"decisions": [{"step": 1, "decision_type": "direct_answer", "selected_ids": [], "rejected_ids": [], "rationale": "", "estimated_cost": 0}],
|
||||
"events": [],
|
||||
"outcome": {"status": "success", "steps": 1, "latency_ms": 10, "user_corrections": 0, "tool_errors": 0, "notes": None},
|
||||
"reward": {"total": 1.0, "components": {"task_success": 1.0, "retrieval_hit": 0.0, "tool_error": 0.0, "user_correction": 0.0, "latency": 0.0, "context_cost": 0.0, "useful_reuse": 0.0}},
|
||||
}
|
||||
)
|
||||
persistence.save_trajectory(
|
||||
{
|
||||
"trajectory_id": "traj-2",
|
||||
"task": {"task_id": "task-2", "input": "B", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
|
||||
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
|
||||
"candidate_sets": {"memory": [], "skill": [], "tool": []},
|
||||
"decisions": [{"step": 1, "decision_type": "call_tool", "selected_ids": ["tool-1"], "rejected_ids": [], "rationale": "", "estimated_cost": 0.1}],
|
||||
"events": [],
|
||||
"outcome": {"status": "failure", "steps": 1, "latency_ms": 50, "user_corrections": 0, "tool_errors": 1, "notes": None},
|
||||
"reward": {"total": -0.2, "components": {"task_success": 0.2, "retrieval_hit": 0.0, "tool_error": 0.3, "user_correction": 0.0, "latency": 0.05, "context_cost": 0.0, "useful_reuse": 0.0}},
|
||||
}
|
||||
)
|
||||
|
||||
replay = TrajectoryReplay()
|
||||
summary = replay.summarize_persistence_store(persistence)
|
||||
|
||||
assert summary.trajectories == 2
|
||||
assert summary.success_count == 1
|
||||
assert summary.failure_count == 1
|
||||
assert summary.tool_action_count == 1
|
||||
Reference in New Issue
Block a user