58 lines
2.8 KiB
Python
58 lines
2.8 KiB
Python
from pathlib import Path
|
|
|
|
from memabra.persistence import PersistenceStore
|
|
from memabra.replay import TrajectoryReplay
|
|
|
|
|
|
EXAMPLE_DIR = "docs/examples"
|
|
|
|
|
|
def test_replay_summary_counts_outcomes_and_actions():
|
|
replay = TrajectoryReplay()
|
|
summary = replay.summarize_directory(EXAMPLE_DIR)
|
|
|
|
assert summary.trajectories == 4
|
|
assert summary.success_count == 2
|
|
assert summary.partial_success_count == 1
|
|
assert summary.failure_count == 1
|
|
assert summary.direct_answer_count == 1
|
|
assert summary.memory_action_count == 1
|
|
assert summary.tool_action_count == 2
|
|
assert summary.skill_action_count == 0
|
|
|
|
|
|
def test_replay_can_summarize_persisted_artifacts(tmp_path: Path):
|
|
persistence = PersistenceStore(base_dir=tmp_path / "artifacts")
|
|
persistence.save_trajectory(
|
|
{
|
|
"trajectory_id": "traj-1",
|
|
"task": {"task_id": "task-1", "input": "A", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
|
|
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
|
|
"candidate_sets": {"memory": [], "skill": [], "tool": []},
|
|
"decisions": [{"step": 1, "decision_type": "direct_answer", "selected_ids": [], "rejected_ids": [], "rationale": "", "estimated_cost": 0}],
|
|
"events": [],
|
|
"outcome": {"status": "success", "steps": 1, "latency_ms": 10, "user_corrections": 0, "tool_errors": 0, "notes": None},
|
|
"reward": {"total": 1.0, "components": {"task_success": 1.0, "retrieval_hit": 0.0, "tool_error": 0.0, "user_correction": 0.0, "latency": 0.0, "context_cost": 0.0, "useful_reuse": 0.0}},
|
|
}
|
|
)
|
|
persistence.save_trajectory(
|
|
{
|
|
"trajectory_id": "traj-2",
|
|
"task": {"task_id": "task-2", "input": "B", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
|
|
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
|
|
"candidate_sets": {"memory": [], "skill": [], "tool": []},
|
|
"decisions": [{"step": 1, "decision_type": "call_tool", "selected_ids": ["tool-1"], "rejected_ids": [], "rationale": "", "estimated_cost": 0.1}],
|
|
"events": [],
|
|
"outcome": {"status": "failure", "steps": 1, "latency_ms": 50, "user_corrections": 0, "tool_errors": 1, "notes": None},
|
|
"reward": {"total": -0.2, "components": {"task_success": 0.2, "retrieval_hit": 0.0, "tool_error": 0.3, "user_correction": 0.0, "latency": 0.05, "context_cost": 0.0, "useful_reuse": 0.0}},
|
|
}
|
|
)
|
|
|
|
replay = TrajectoryReplay()
|
|
summary = replay.summarize_persistence_store(persistence)
|
|
|
|
assert summary.trajectories == 2
|
|
assert summary.success_count == 1
|
|
assert summary.failure_count == 1
|
|
assert summary.tool_action_count == 1
|