from __future__ import annotations import pytest from memabra.promotion import PromotionDecision, PromotionPolicy from memabra.evaluator import EvaluationResult class TestPromotionPolicy: def test_accepted_when_challenger_improves_on_all_metrics(self): policy = PromotionPolicy( min_reward_delta=0.01, max_error_rate_increase=0.05, max_latency_increase_ms=100.0, required_task_count=2, ) baseline = EvaluationResult( task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0, ) challenger = EvaluationResult( task_count=2, avg_reward=0.6, error_rate=0.05, avg_latency_ms=45.0, ) decision = policy.evaluate(baseline, challenger) assert isinstance(decision, PromotionDecision) assert decision.accepted is True assert decision.reasons == [] assert decision.metrics["reward_delta"] == pytest.approx(0.1, abs=0.001) assert decision.metrics["error_rate_delta"] == pytest.approx(-0.05, abs=0.001) assert decision.metrics["latency_delta_ms"] == pytest.approx(-5.0, abs=0.001) def test_rejected_when_reward_delta_below_minimum(self): policy = PromotionPolicy( min_reward_delta=0.1, max_error_rate_increase=0.05, max_latency_increase_ms=100.0, required_task_count=2, ) baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.55, error_rate=0.1, avg_latency_ms=50.0) decision = policy.evaluate(baseline, challenger) assert decision.accepted is False assert any("reward" in r.lower() for r in decision.reasons) def test_rejected_when_error_rate_increase_exceeds_max(self): policy = PromotionPolicy( min_reward_delta=0.01, max_error_rate_increase=0.05, max_latency_increase_ms=100.0, required_task_count=2, ) baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.2, avg_latency_ms=50.0) decision = policy.evaluate(baseline, challenger) assert decision.accepted is False assert any("error" in r.lower() for r in decision.reasons) def test_rejected_when_latency_increase_exceeds_max(self): policy = PromotionPolicy( min_reward_delta=0.01, max_error_rate_increase=0.05, max_latency_increase_ms=10.0, required_task_count=2, ) baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.1, avg_latency_ms=65.0) decision = policy.evaluate(baseline, challenger) assert decision.accepted is False assert any("latency" in r.lower() for r in decision.reasons) def test_rejected_when_task_count_below_required(self): policy = PromotionPolicy( min_reward_delta=0.01, max_error_rate_increase=0.05, max_latency_increase_ms=100.0, required_task_count=5, ) baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.1, avg_latency_ms=50.0) decision = policy.evaluate(baseline, challenger) assert decision.accepted is False assert any("task count" in r.lower() for r in decision.reasons) def test_multiple_rejection_reasons_accumulate(self): policy = PromotionPolicy( min_reward_delta=0.2, max_error_rate_increase=0.01, max_latency_increase_ms=10.0, required_task_count=10, ) baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0) challenger = EvaluationResult(task_count=2, avg_reward=0.55, error_rate=0.15, avg_latency_ms=70.0) decision = policy.evaluate(baseline, challenger) assert decision.accepted is False assert len(decision.reasons) >= 3