59 lines
2.1 KiB
Python
59 lines
2.1 KiB
Python
from memabra.trajectory_summary import TrajectorySummarizer
|
|
|
|
|
|
def test_summarize_direct_answer_success():
|
|
summarizer = TrajectorySummarizer()
|
|
trajectory = {
|
|
"task": {"input": "What is 2+2?"},
|
|
"decisions": [{"decision_type": "direct_answer"}],
|
|
"outcome": {"status": "success", "steps": 1, "tool_errors": 0, "user_corrections": 0},
|
|
"reward": {"total": 1.0},
|
|
}
|
|
summary = summarizer.summarize(trajectory)
|
|
assert "Task: 'What is 2+2?'" in summary
|
|
assert "Actions: direct_answer" in summary
|
|
assert "Outcome: success (reward=1.0, steps=1)" in summary
|
|
|
|
|
|
def test_summarize_multi_step_with_tool_errors():
|
|
summarizer = TrajectorySummarizer()
|
|
trajectory = {
|
|
"task": {"input": "Run analysis"},
|
|
"decisions": [
|
|
{"decision_type": "clarify"},
|
|
{"decision_type": "call_tool"},
|
|
{"decision_type": "direct_answer"},
|
|
],
|
|
"outcome": {"status": "partial_success", "steps": 3, "tool_errors": 1, "user_corrections": 1},
|
|
"reward": {"total": 0.5},
|
|
}
|
|
summary = summarizer.summarize(trajectory)
|
|
assert "Actions: clarify -> call_tool -> direct_answer" in summary
|
|
assert "Outcome: partial_success (reward=0.5, steps=3)" in summary
|
|
assert "Tool errors: 1" in summary
|
|
assert "User corrections: 1" in summary
|
|
|
|
|
|
def test_summarize_truncates_long_input():
|
|
summarizer = TrajectorySummarizer()
|
|
long_input = "a" * 100
|
|
trajectory = {
|
|
"task": {"input": long_input},
|
|
"decisions": [{"decision_type": "direct_answer"}],
|
|
"outcome": {"status": "success", "steps": 1, "tool_errors": 0, "user_corrections": 0},
|
|
"reward": {"total": 0.9},
|
|
}
|
|
summary = summarizer.summarize(trajectory)
|
|
assert "Task: '" in summary
|
|
assert "..." in summary
|
|
assert len(summary) < 300
|
|
|
|
|
|
def test_summarize_handles_missing_fields_gracefully():
|
|
summarizer = TrajectorySummarizer()
|
|
trajectory = {}
|
|
summary = summarizer.summarize(trajectory)
|
|
assert "Task: ''" in summary
|
|
assert "Actions: none" in summary
|
|
assert "Outcome: unknown (reward=0.0, steps=0)" in summary
|