Initial standalone memabra release
This commit is contained in:
112
tests/test_promotion.py
Normal file
112
tests/test_promotion.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from memabra.promotion import PromotionDecision, PromotionPolicy
|
||||
from memabra.evaluator import EvaluationResult
|
||||
|
||||
|
||||
class TestPromotionPolicy:
|
||||
def test_accepted_when_challenger_improves_on_all_metrics(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.01,
|
||||
max_error_rate_increase=0.05,
|
||||
max_latency_increase_ms=100.0,
|
||||
required_task_count=2,
|
||||
)
|
||||
baseline = EvaluationResult(
|
||||
task_count=2,
|
||||
avg_reward=0.5,
|
||||
error_rate=0.1,
|
||||
avg_latency_ms=50.0,
|
||||
)
|
||||
challenger = EvaluationResult(
|
||||
task_count=2,
|
||||
avg_reward=0.6,
|
||||
error_rate=0.05,
|
||||
avg_latency_ms=45.0,
|
||||
)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert isinstance(decision, PromotionDecision)
|
||||
assert decision.accepted is True
|
||||
assert decision.reasons == []
|
||||
assert decision.metrics["reward_delta"] == pytest.approx(0.1, abs=0.001)
|
||||
assert decision.metrics["error_rate_delta"] == pytest.approx(-0.05, abs=0.001)
|
||||
assert decision.metrics["latency_delta_ms"] == pytest.approx(-5.0, abs=0.001)
|
||||
|
||||
def test_rejected_when_reward_delta_below_minimum(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.1,
|
||||
max_error_rate_increase=0.05,
|
||||
max_latency_increase_ms=100.0,
|
||||
required_task_count=2,
|
||||
)
|
||||
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
||||
challenger = EvaluationResult(task_count=2, avg_reward=0.55, error_rate=0.1, avg_latency_ms=50.0)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert decision.accepted is False
|
||||
assert any("reward" in r.lower() for r in decision.reasons)
|
||||
|
||||
def test_rejected_when_error_rate_increase_exceeds_max(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.01,
|
||||
max_error_rate_increase=0.05,
|
||||
max_latency_increase_ms=100.0,
|
||||
required_task_count=2,
|
||||
)
|
||||
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
||||
challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.2, avg_latency_ms=50.0)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert decision.accepted is False
|
||||
assert any("error" in r.lower() for r in decision.reasons)
|
||||
|
||||
def test_rejected_when_latency_increase_exceeds_max(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.01,
|
||||
max_error_rate_increase=0.05,
|
||||
max_latency_increase_ms=10.0,
|
||||
required_task_count=2,
|
||||
)
|
||||
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
||||
challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.1, avg_latency_ms=65.0)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert decision.accepted is False
|
||||
assert any("latency" in r.lower() for r in decision.reasons)
|
||||
|
||||
def test_rejected_when_task_count_below_required(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.01,
|
||||
max_error_rate_increase=0.05,
|
||||
max_latency_increase_ms=100.0,
|
||||
required_task_count=5,
|
||||
)
|
||||
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
||||
challenger = EvaluationResult(task_count=2, avg_reward=0.6, error_rate=0.1, avg_latency_ms=50.0)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert decision.accepted is False
|
||||
assert any("task count" in r.lower() for r in decision.reasons)
|
||||
|
||||
def test_multiple_rejection_reasons_accumulate(self):
|
||||
policy = PromotionPolicy(
|
||||
min_reward_delta=0.2,
|
||||
max_error_rate_increase=0.01,
|
||||
max_latency_increase_ms=10.0,
|
||||
required_task_count=10,
|
||||
)
|
||||
baseline = EvaluationResult(task_count=2, avg_reward=0.5, error_rate=0.1, avg_latency_ms=50.0)
|
||||
challenger = EvaluationResult(task_count=2, avg_reward=0.55, error_rate=0.15, avg_latency_ms=70.0)
|
||||
|
||||
decision = policy.evaluate(baseline, challenger)
|
||||
|
||||
assert decision.accepted is False
|
||||
assert len(decision.reasons) >= 3
|
||||
Reference in New Issue
Block a user