Files
memabra/tests/test_outcome_reward.py
2026-04-15 11:06:05 +08:00

127 lines
4.5 KiB
Python

from memabra.execution import ActionResult
from memabra.outcome import OutcomeEngine, RewardEngine
from memabra.retrieval import RetrievalResult
from memabra.router import RouteDecision, TaskContext
from memabra.telemetry import RewardBreakdown
def test_outcome_engine_success_for_memory_injection():
engine = OutcomeEngine()
decision = RouteDecision(decision_type="inject_memory", selected_ids=["mem-1"])
result = ActionResult(decision_type="inject_memory", status="executed", details={"latency_ms": 50})
outcome = engine.build_outcome(decision, result)
assert outcome.status == "success"
assert outcome.steps == 1
assert outcome.latency_ms == 50
assert outcome.tool_errors == 0
def test_outcome_engine_failure_for_tool_error():
engine = OutcomeEngine()
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1"])
result = ActionResult(decision_type="call_tool", status="error", details={"latency_ms": 120})
outcome = engine.build_outcome(decision, result)
assert outcome.status == "failure"
assert outcome.latency_ms == 120
assert outcome.tool_errors == 1
def test_outcome_engine_counts_multiple_tool_errors():
engine = OutcomeEngine()
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
result = ActionResult(
decision_type="call_tool",
status="error",
details={
"latency_ms": 200,
"results": [
{"tool_id": "tool-1", "status": "error"},
{"tool_id": "tool-2", "status": "error"},
],
},
)
outcome = engine.build_outcome(decision, result)
assert outcome.status == "failure"
assert outcome.tool_errors == 2
def test_outcome_engine_partial_success_for_mixed_tool_results():
engine = OutcomeEngine()
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
result = ActionResult(
decision_type="call_tool",
status="error",
details={
"latency_ms": 200,
"results": [
{"tool_id": "tool-1", "status": "success"},
{"tool_id": "tool-2", "status": "error"},
],
},
)
outcome = engine.build_outcome(decision, result)
assert outcome.status == "partial_success"
assert outcome.tool_errors == 1
def test_reward_engine_penalizes_latency_by_tier():
outcome_engine = OutcomeEngine()
reward_engine = RewardEngine()
decision = RouteDecision(decision_type="call_tool")
outcome_fast = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 200}))
outcome_slow = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 2500}))
reward_fast = reward_engine.compute(decision, outcome_fast)
reward_slow = reward_engine.compute(decision, outcome_slow)
assert reward_fast.latency < reward_slow.latency
assert reward_slow.latency > 0.5
def test_reward_engine_context_cost_based_on_candidate_count():
from memabra.candidate_types import CandidateObject
outcome_engine = OutcomeEngine()
reward_engine = RewardEngine()
decision = RouteDecision(decision_type="direct_answer")
outcome = outcome_engine.build_outcome(decision, ActionResult(decision_type="direct_answer", status="skipped", details={"latency_ms": 0}))
dummy_candidate = CandidateObject(id="c1", type="memory", title="t", summary="s", triggers=[])
retrieval = RetrievalResult(memory=[dummy_candidate, dummy_candidate, dummy_candidate], skill=[dummy_candidate, dummy_candidate], tool=[dummy_candidate])
reward = reward_engine.compute(decision, outcome, retrieval_result=retrieval)
assert reward.context_cost > 0
def test_reward_engine_reduces_task_success_for_multiple_errors():
outcome_engine = OutcomeEngine()
reward_engine = RewardEngine()
decision = RouteDecision(decision_type="call_tool")
outcome = outcome_engine.build_outcome(
decision,
ActionResult(
decision_type="call_tool",
status="error",
details={
"latency_ms": 100,
"results": [
{"tool_id": "tool-1", "status": "error"},
{"tool_id": "tool-2", "status": "error"},
],
},
),
)
reward = reward_engine.compute(decision, outcome)
assert reward.task_success < 0.5
assert reward.tool_error >= 0.5