from __future__ import annotations from datetime import datetime, timezone from memabra.evaluator import EvaluationResult from memabra.promotion import PromotionDecision, PromotionPolicy from memabra.training_reports import TrainingReportStore, build_report def test_build_report_includes_all_required_fields(): baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.05, avg_latency_ms=45.0) decision = PromotionDecision(accepted=True, reasons=[], metrics={"reward_delta": 0.1}) report = build_report( source_trajectory_ids=["t1", "t2"], baseline=baseline, challenger=challenger, decision=decision, promoted_version_id="v-2026", ) assert report["source_trajectory_ids"] == ["t1", "t2"] assert report["sample_count"] == 2 assert "timestamp" in report assert report["promoted_version_id"] == "v-2026" assert report["baseline_metrics"]["avg_reward"] == 0.5 assert report["challenger_metrics"]["avg_reward"] == 0.6 assert report["promotion_decision"]["accepted"] is True def test_training_report_store_save_and_list(tmp_path): store = TrainingReportStore(base_dir=tmp_path / "reports") report = build_report( source_trajectory_ids=["t1"], baseline=EvaluationResult(task_count=1, avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0), challenger=EvaluationResult(task_count=1, avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0), decision=PromotionDecision(accepted=False, reasons=["reward too low"], metrics={}), ) saved = store.save(report) reports = store.list_reports() assert len(reports) == 1 assert reports[0]["report_id"] == saved["report_id"] assert reports[0]["promotion_decision"]["accepted"] is False def test_training_report_store_get_report_returns_specific_report(tmp_path): from memabra.training_reports import TrainingReportStore, build_report from memabra.evaluator import EvaluationResult from memabra.promotion import PromotionDecision store = TrainingReportStore(base_dir=tmp_path) report = build_report( source_trajectory_ids=["t1", "t2"], baseline=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}), challenger=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}), decision=PromotionDecision(accepted=True, reasons=[], metrics={}), promoted_version_id="v1", ) store.save(report) fetched = store.get_report(report["report_id"]) assert fetched is not None assert fetched["report_id"] == report["report_id"] assert fetched["promoted_version_id"] == "v1" def test_training_report_store_get_report_missing_returns_none(tmp_path): from memabra.training_reports import TrainingReportStore store = TrainingReportStore(base_dir=tmp_path) assert store.get_report("nonexistent") is None