75 lines
3.0 KiB
Python
75 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
from memabra.evaluator import EvaluationResult
|
|
from memabra.promotion import PromotionDecision, PromotionPolicy
|
|
from memabra.training_reports import TrainingReportStore, build_report
|
|
|
|
|
|
def test_build_report_includes_all_required_fields():
|
|
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
|
challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.05, avg_latency_ms=45.0)
|
|
decision = PromotionDecision(accepted=True, reasons=[], metrics={"reward_delta": 0.1})
|
|
|
|
report = build_report(
|
|
source_trajectory_ids=["t1", "t2"],
|
|
baseline=baseline,
|
|
challenger=challenger,
|
|
decision=decision,
|
|
promoted_version_id="v-2026",
|
|
)
|
|
|
|
assert report["source_trajectory_ids"] == ["t1", "t2"]
|
|
assert report["sample_count"] == 2
|
|
assert "timestamp" in report
|
|
assert report["promoted_version_id"] == "v-2026"
|
|
assert report["baseline_metrics"]["avg_reward"] == 0.5
|
|
assert report["challenger_metrics"]["avg_reward"] == 0.6
|
|
assert report["promotion_decision"]["accepted"] is True
|
|
|
|
|
|
def test_training_report_store_save_and_list(tmp_path):
|
|
store = TrainingReportStore(base_dir=tmp_path / "reports")
|
|
report = build_report(
|
|
source_trajectory_ids=["t1"],
|
|
baseline=EvaluationResult(task_count=1, avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0),
|
|
challenger=EvaluationResult(task_count=1, avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0),
|
|
decision=PromotionDecision(accepted=False, reasons=["reward too low"], metrics={}),
|
|
)
|
|
|
|
saved = store.save(report)
|
|
reports = store.list_reports()
|
|
|
|
assert len(reports) == 1
|
|
assert reports[0]["report_id"] == saved["report_id"]
|
|
assert reports[0]["promotion_decision"]["accepted"] is False
|
|
|
|
|
|
def test_training_report_store_get_report_returns_specific_report(tmp_path):
|
|
from memabra.training_reports import TrainingReportStore, build_report
|
|
from memabra.evaluator import EvaluationResult
|
|
from memabra.promotion import PromotionDecision
|
|
|
|
store = TrainingReportStore(base_dir=tmp_path)
|
|
report = build_report(
|
|
source_trajectory_ids=["t1", "t2"],
|
|
baseline=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.5, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}),
|
|
challenger=EvaluationResult(task_count=1, trajectories=[], avg_reward=0.6, error_rate=0.0, avg_latency_ms=10.0, decision_distribution={}),
|
|
decision=PromotionDecision(accepted=True, reasons=[], metrics={}),
|
|
promoted_version_id="v1",
|
|
)
|
|
store.save(report)
|
|
|
|
fetched = store.get_report(report["report_id"])
|
|
assert fetched is not None
|
|
assert fetched["report_id"] == report["report_id"]
|
|
assert fetched["promoted_version_id"] == "v1"
|
|
|
|
|
|
def test_training_report_store_get_report_missing_returns_none(tmp_path):
|
|
from memabra.training_reports import TrainingReportStore
|
|
|
|
store = TrainingReportStore(base_dir=tmp_path)
|
|
assert store.get_report("nonexistent") is None
|