Initial standalone memabra release
This commit is contained in:
126
tests/test_outcome_reward.py
Normal file
126
tests/test_outcome_reward.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from memabra.execution import ActionResult
|
||||
from memabra.outcome import OutcomeEngine, RewardEngine
|
||||
from memabra.retrieval import RetrievalResult
|
||||
from memabra.router import RouteDecision, TaskContext
|
||||
from memabra.telemetry import RewardBreakdown
|
||||
|
||||
|
||||
def test_outcome_engine_success_for_memory_injection():
|
||||
engine = OutcomeEngine()
|
||||
decision = RouteDecision(decision_type="inject_memory", selected_ids=["mem-1"])
|
||||
result = ActionResult(decision_type="inject_memory", status="executed", details={"latency_ms": 50})
|
||||
|
||||
outcome = engine.build_outcome(decision, result)
|
||||
|
||||
assert outcome.status == "success"
|
||||
assert outcome.steps == 1
|
||||
assert outcome.latency_ms == 50
|
||||
assert outcome.tool_errors == 0
|
||||
|
||||
|
||||
def test_outcome_engine_failure_for_tool_error():
|
||||
engine = OutcomeEngine()
|
||||
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1"])
|
||||
result = ActionResult(decision_type="call_tool", status="error", details={"latency_ms": 120})
|
||||
|
||||
outcome = engine.build_outcome(decision, result)
|
||||
|
||||
assert outcome.status == "failure"
|
||||
assert outcome.latency_ms == 120
|
||||
assert outcome.tool_errors == 1
|
||||
|
||||
|
||||
def test_outcome_engine_counts_multiple_tool_errors():
|
||||
engine = OutcomeEngine()
|
||||
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
|
||||
result = ActionResult(
|
||||
decision_type="call_tool",
|
||||
status="error",
|
||||
details={
|
||||
"latency_ms": 200,
|
||||
"results": [
|
||||
{"tool_id": "tool-1", "status": "error"},
|
||||
{"tool_id": "tool-2", "status": "error"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
outcome = engine.build_outcome(decision, result)
|
||||
|
||||
assert outcome.status == "failure"
|
||||
assert outcome.tool_errors == 2
|
||||
|
||||
|
||||
def test_outcome_engine_partial_success_for_mixed_tool_results():
|
||||
engine = OutcomeEngine()
|
||||
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
|
||||
result = ActionResult(
|
||||
decision_type="call_tool",
|
||||
status="error",
|
||||
details={
|
||||
"latency_ms": 200,
|
||||
"results": [
|
||||
{"tool_id": "tool-1", "status": "success"},
|
||||
{"tool_id": "tool-2", "status": "error"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
outcome = engine.build_outcome(decision, result)
|
||||
|
||||
assert outcome.status == "partial_success"
|
||||
assert outcome.tool_errors == 1
|
||||
|
||||
|
||||
def test_reward_engine_penalizes_latency_by_tier():
|
||||
outcome_engine = OutcomeEngine()
|
||||
reward_engine = RewardEngine()
|
||||
decision = RouteDecision(decision_type="call_tool")
|
||||
outcome_fast = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 200}))
|
||||
outcome_slow = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 2500}))
|
||||
|
||||
reward_fast = reward_engine.compute(decision, outcome_fast)
|
||||
reward_slow = reward_engine.compute(decision, outcome_slow)
|
||||
|
||||
assert reward_fast.latency < reward_slow.latency
|
||||
assert reward_slow.latency > 0.5
|
||||
|
||||
|
||||
def test_reward_engine_context_cost_based_on_candidate_count():
|
||||
from memabra.candidate_types import CandidateObject
|
||||
|
||||
outcome_engine = OutcomeEngine()
|
||||
reward_engine = RewardEngine()
|
||||
decision = RouteDecision(decision_type="direct_answer")
|
||||
outcome = outcome_engine.build_outcome(decision, ActionResult(decision_type="direct_answer", status="skipped", details={"latency_ms": 0}))
|
||||
dummy_candidate = CandidateObject(id="c1", type="memory", title="t", summary="s", triggers=[])
|
||||
retrieval = RetrievalResult(memory=[dummy_candidate, dummy_candidate, dummy_candidate], skill=[dummy_candidate, dummy_candidate], tool=[dummy_candidate])
|
||||
|
||||
reward = reward_engine.compute(decision, outcome, retrieval_result=retrieval)
|
||||
|
||||
assert reward.context_cost > 0
|
||||
|
||||
|
||||
def test_reward_engine_reduces_task_success_for_multiple_errors():
|
||||
outcome_engine = OutcomeEngine()
|
||||
reward_engine = RewardEngine()
|
||||
decision = RouteDecision(decision_type="call_tool")
|
||||
outcome = outcome_engine.build_outcome(
|
||||
decision,
|
||||
ActionResult(
|
||||
decision_type="call_tool",
|
||||
status="error",
|
||||
details={
|
||||
"latency_ms": 100,
|
||||
"results": [
|
||||
{"tool_id": "tool-1", "status": "error"},
|
||||
{"tool_id": "tool-2", "status": "error"},
|
||||
],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
reward = reward_engine.compute(decision, outcome)
|
||||
|
||||
assert reward.task_success < 0.5
|
||||
assert reward.tool_error >= 0.5
|
||||
Reference in New Issue
Block a user