memabra/tests/test_replay.py

from pathlib import Path

from memabra.persistence import PersistenceStore
from memabra.replay import TrajectoryReplay


EXAMPLE_DIR = "docs/examples"


def test_replay_summary_counts_outcomes_and_actions():
    replay = TrajectoryReplay()
    summary = replay.summarize_directory(EXAMPLE_DIR)

    assert summary.trajectories == 4
    assert summary.success_count == 2
    assert summary.partial_success_count == 1
    assert summary.failure_count == 1
    assert summary.direct_answer_count == 1
    assert summary.memory_action_count == 1
    assert summary.tool_action_count == 2
    assert summary.skill_action_count == 0


def test_replay_can_summarize_persisted_artifacts(tmp_path: Path):
    persistence = PersistenceStore(base_dir=tmp_path / "artifacts")
    persistence.save_trajectory(
        {
            "trajectory_id": "traj-1",
            "task": {"task_id": "task-1", "input": "A", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
            "context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
            "candidate_sets": {"memory": [], "skill": [], "tool": []},
            "decisions": [{"step": 1, "decision_type": "direct_answer", "selected_ids": [], "rejected_ids": [], "rationale": "", "estimated_cost": 0}],
            "events": [],
            "outcome": {"status": "success", "steps": 1, "latency_ms": 10, "user_corrections": 0, "tool_errors": 0, "notes": None},
            "reward": {"total": 1.0, "components": {"task_success": 1.0, "retrieval_hit": 0.0, "tool_error": 0.0, "user_correction": 0.0, "latency": 0.0, "context_cost": 0.0, "useful_reuse": 0.0}},
        }
    )
    persistence.save_trajectory(
        {
            "trajectory_id": "traj-2",
            "task": {"task_id": "task-2", "input": "B", "channel": "local", "created_at": "2026-01-01T00:00:00Z", "user_id": None},
            "context_snapshot": {"conversation_summary": "", "environment_summary": "", "recent_failures": []},
            "candidate_sets": {"memory": [], "skill": [], "tool": []},
            "decisions": [{"step": 1, "decision_type": "call_tool", "selected_ids": ["tool-1"], "rejected_ids": [], "rationale": "", "estimated_cost": 0.1}],
            "events": [],
            "outcome": {"status": "failure", "steps": 1, "latency_ms": 50, "user_corrections": 0, "tool_errors": 1, "notes": None},
            "reward": {"total": -0.2, "components": {"task_success": 0.2, "retrieval_hit": 0.0, "tool_error": 0.3, "user_correction": 0.0, "latency": 0.05, "context_cost": 0.0, "useful_reuse": 0.0}},
        }
    )

    replay = TrajectoryReplay()
    summary = replay.summarize_persistence_store(persistence)

    assert summary.trajectories == 2
    assert summary.success_count == 1
    assert summary.failure_count == 1
    assert summary.tool_action_count == 1