127 lines
4.5 KiB
Python
127 lines
4.5 KiB
Python
from memabra.execution import ActionResult
|
|
from memabra.outcome import OutcomeEngine, RewardEngine
|
|
from memabra.retrieval import RetrievalResult
|
|
from memabra.router import RouteDecision, TaskContext
|
|
from memabra.telemetry import RewardBreakdown
|
|
|
|
|
|
def test_outcome_engine_success_for_memory_injection():
|
|
engine = OutcomeEngine()
|
|
decision = RouteDecision(decision_type="inject_memory", selected_ids=["mem-1"])
|
|
result = ActionResult(decision_type="inject_memory", status="executed", details={"latency_ms": 50})
|
|
|
|
outcome = engine.build_outcome(decision, result)
|
|
|
|
assert outcome.status == "success"
|
|
assert outcome.steps == 1
|
|
assert outcome.latency_ms == 50
|
|
assert outcome.tool_errors == 0
|
|
|
|
|
|
def test_outcome_engine_failure_for_tool_error():
|
|
engine = OutcomeEngine()
|
|
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1"])
|
|
result = ActionResult(decision_type="call_tool", status="error", details={"latency_ms": 120})
|
|
|
|
outcome = engine.build_outcome(decision, result)
|
|
|
|
assert outcome.status == "failure"
|
|
assert outcome.latency_ms == 120
|
|
assert outcome.tool_errors == 1
|
|
|
|
|
|
def test_outcome_engine_counts_multiple_tool_errors():
|
|
engine = OutcomeEngine()
|
|
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
|
|
result = ActionResult(
|
|
decision_type="call_tool",
|
|
status="error",
|
|
details={
|
|
"latency_ms": 200,
|
|
"results": [
|
|
{"tool_id": "tool-1", "status": "error"},
|
|
{"tool_id": "tool-2", "status": "error"},
|
|
],
|
|
},
|
|
)
|
|
|
|
outcome = engine.build_outcome(decision, result)
|
|
|
|
assert outcome.status == "failure"
|
|
assert outcome.tool_errors == 2
|
|
|
|
|
|
def test_outcome_engine_partial_success_for_mixed_tool_results():
|
|
engine = OutcomeEngine()
|
|
decision = RouteDecision(decision_type="call_tool", selected_ids=["tool-1", "tool-2"])
|
|
result = ActionResult(
|
|
decision_type="call_tool",
|
|
status="error",
|
|
details={
|
|
"latency_ms": 200,
|
|
"results": [
|
|
{"tool_id": "tool-1", "status": "success"},
|
|
{"tool_id": "tool-2", "status": "error"},
|
|
],
|
|
},
|
|
)
|
|
|
|
outcome = engine.build_outcome(decision, result)
|
|
|
|
assert outcome.status == "partial_success"
|
|
assert outcome.tool_errors == 1
|
|
|
|
|
|
def test_reward_engine_penalizes_latency_by_tier():
|
|
outcome_engine = OutcomeEngine()
|
|
reward_engine = RewardEngine()
|
|
decision = RouteDecision(decision_type="call_tool")
|
|
outcome_fast = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 200}))
|
|
outcome_slow = outcome_engine.build_outcome(decision, ActionResult(decision_type="call_tool", status="success", details={"latency_ms": 2500}))
|
|
|
|
reward_fast = reward_engine.compute(decision, outcome_fast)
|
|
reward_slow = reward_engine.compute(decision, outcome_slow)
|
|
|
|
assert reward_fast.latency < reward_slow.latency
|
|
assert reward_slow.latency > 0.5
|
|
|
|
|
|
def test_reward_engine_context_cost_based_on_candidate_count():
|
|
from memabra.candidate_types import CandidateObject
|
|
|
|
outcome_engine = OutcomeEngine()
|
|
reward_engine = RewardEngine()
|
|
decision = RouteDecision(decision_type="direct_answer")
|
|
outcome = outcome_engine.build_outcome(decision, ActionResult(decision_type="direct_answer", status="skipped", details={"latency_ms": 0}))
|
|
dummy_candidate = CandidateObject(id="c1", type="memory", title="t", summary="s", triggers=[])
|
|
retrieval = RetrievalResult(memory=[dummy_candidate, dummy_candidate, dummy_candidate], skill=[dummy_candidate, dummy_candidate], tool=[dummy_candidate])
|
|
|
|
reward = reward_engine.compute(decision, outcome, retrieval_result=retrieval)
|
|
|
|
assert reward.context_cost > 0
|
|
|
|
|
|
def test_reward_engine_reduces_task_success_for_multiple_errors():
|
|
outcome_engine = OutcomeEngine()
|
|
reward_engine = RewardEngine()
|
|
decision = RouteDecision(decision_type="call_tool")
|
|
outcome = outcome_engine.build_outcome(
|
|
decision,
|
|
ActionResult(
|
|
decision_type="call_tool",
|
|
status="error",
|
|
details={
|
|
"latency_ms": 100,
|
|
"results": [
|
|
{"tool_id": "tool-1", "status": "error"},
|
|
{"tool_id": "tool-2", "status": "error"},
|
|
],
|
|
},
|
|
),
|
|
)
|
|
|
|
reward = reward_engine.compute(decision, outcome)
|
|
|
|
assert reward.task_success < 0.5
|
|
assert reward.tool_error >= 0.5
|