Files
memabra/tests/test_replay.py
2026-04-15 11:06:05 +08:00

58 lines
2.8 KiB
Python

from pathlib import Path
from memabra.persistence import PersistenceStore
from memabra.replay import TrajectoryReplay
EXAMPLE_DIR = "docs/examples"
def test_replay_summary_counts_outcomes_and_actions():
replay = TrajectoryReplay()
summary = replay.summarize_directory(EXAMPLE_DIR)
assert summary.trajectories == 4
assert summary.success_count == 2
assert summary.partial_success_count == 1
assert summary.failure_count == 1
assert summary.direct_answer_count == 1
assert summary.memory_action_count == 1
assert summary.tool_action_count == 2
assert summary.skill_action_count == 0
def test_replay_can_summarize_persisted_artifacts(tmp_path: Path):
persistence = PersistenceStore(base_dir=tmp_path / "artifacts")
persistence.save_trajectory(
{
"trajectory_id": "traj-1",
"task": {"task_id": "task-1", "input": "A", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
"candidate_sets": {"memory": [], "skill": [], "tool": []},
"decisions": [{"step": 1, "decision_type": "direct_answer", "selected_ids": [], "rejected_ids": [], "rationale": "", "estimated_cost": 0}],
"events": [],
"outcome": {"status": "success", "steps": 1, "latency_ms": 10, "user_corrections": 0, "tool_errors": 0, "notes": None},
"reward": {"total": 1.0, "components": {"task_success": 1.0, "retrieval_hit": 0.0, "tool_error": 0.0, "user_correction": 0.0, "latency": 0.0, "context_cost": 0.0, "useful_reuse": 0.0}},
}
)
persistence.save_trajectory(
{
"trajectory_id": "traj-2",
"task": {"task_id": "task-2", "input": "B", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
"context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
"candidate_sets": {"memory": [], "skill": [], "tool": []},
"decisions": [{"step": 1, "decision_type": "call_tool", "selected_ids": ["tool-1"], "rejected_ids": [], "rationale": "", "estimated_cost": 0.1}],
"events": [],
"outcome": {"status": "failure", "steps": 1, "latency_ms": 50, "user_corrections": 0, "tool_errors": 1, "notes": None},
"reward": {"total": -0.2, "components": {"task_success": 0.2, "retrieval_hit": 0.0, "tool_error": 0.3, "user_correction": 0.0, "latency": 0.05, "context_cost": 0.0, "useful_reuse": 0.0}},
}
)
replay = TrajectoryReplay()
summary = replay.summarize_persistence_store(persistence)
assert summary.trajectories == 2
assert summary.success_count == 1
assert summary.failure_count == 1
assert summary.tool_action_count == 1