Files
memabra/tests/test_training_reports.py
2026-04-15 11:06:05 +08:00

75 lines
3.0 KiB
Python

from __future__ import annotations
from datetime import datetime, timezone
from memabra.evaluator import EvaluationResult
from memabra.promotion import PromotionDecision, PromotionPolicy
from memabra.training_reports import TrainingReportStore, build_report
def test_build_report_includes_all_required_fields():
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.05, avg_latency_ms=45.0)
decision = PromotionDecision(accepted=True, reasons=[], metrics={"reward_delta": 0.1})
report = build_report(
source_trajectory_ids=["t1", "t2"],
baseline=baseline,
challenger=challenger,
decision=decision,
promoted_version_id="v-2026",
)
assert report["source_trajectory_ids"] == ["t1", "t2"]
assert report["sample_count"] == 2
assert "timestamp" in report
assert report["promoted_version_id"] == "v-2026"
assert report["baseline_metrics"]["avg_reward"] == 0.5
assert report["challenger_metrics"]["avg_reward"] == 0.6
assert report["promotion_decision"]["accepted"] is True
def test_training_report_store_save_and_list(tmp_path):
store = TrainingReportStore(base_dir=tmp_path / "reports")
report = build_report(
source_trajectory_ids=["t1"],
baseline=EvaluationResult(task_count=1, avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0),
challenger=EvaluationResult(task_count=1, avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0),
decision=PromotionDecision(accepted=False, reasons=["reward too low"], metrics={}),
)
saved = store.save(report)
reports = store.list_reports()
assert len(reports) == 1
assert reports[0]["report_id"] == saved["report_id"]
assert reports[0]["promotion_decision"]["accepted"] is False
def test_training_report_store_get_report_returns_specific_report(tmp_path):
from memabra.training_reports import TrainingReportStore, build_report
from memabra.evaluator import EvaluationResult
from memabra.promotion import PromotionDecision
store = TrainingReportStore(base_dir=tmp_path)
report = build_report(
source_trajectory_ids=["t1", "t2"],
baseline=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}),
challenger=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}),
decision=PromotionDecision(accepted=True, reasons=[], metrics={}),
promoted_version_id="v1",
)
store.save(report)
fetched = store.get_report(report["report_id"])
assert fetched is not None
assert fetched["report_id"] == report["report_id"]
assert fetched["promoted_version_id"] == "v1"
def test_training_report_store_get_report_missing_returns_none(tmp_path):
from memabra.training_reports import TrainingReportStore
store = TrainingReportStore(base_dir=tmp_path)
assert store.get_report("nonexistent") is None