Files
memabra/tests/test_app.py
2026-04-15 11:06:05 +08:00

198 lines
7.2 KiB
Python

from pathlib import Path
from memabra.app import MemabraApp, build_app_with_skills, build_demo_app
def test_build_demo_app_runs_task_and_produces_summary(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
trajectory = app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="oza")
summary = app.replay_summary()
assert trajectory["trajectory_id"].startswith("traj-")
assert summary.trajectories == 1
assert any(event["event_type"] == "memory_injected" for event in trajectory["events"])
assert len(list((tmp_path / "demo-artifacts" / "trajectories").glob("*.json"))) == 1
def test_app_can_run_tool_task_with_demo_backend(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
trajectory = app.run_task("Check the current system status.")
assert trajectory["decisions"][0]["decision_type"] == "call_tool"
assert any(event["event_type"] == "tool_result" for event in trajectory["events"])
assert trajectory["outcome"]["status"] == "success"
def test_build_app_with_skills_loads_real_skill_from_filesystem(tmp_path: Path):
skill_dir = tmp_path / "skills" / "github-auth"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"---\n"
"name: github-auth\n"
"description: Authenticate with GitHub.\n"
"---\n\n"
"# GitHub Auth\n\n"
"Use git or gh.\n"
)
app = build_app_with_skills(base_dir=tmp_path / "artifacts", skill_search_paths=[tmp_path / "skills"])
# github-auth is not in the candidate set by default, so router won't trigger it.
# We test that the app builds and a memory task still works.
trajectory = app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="oza")
assert trajectory["decisions"][0]["decision_type"] == "inject_memory"
# Now verify the skill backend is actually wired by loading directly
backend = app.runner.execution_engine.skill_executor.backend
payload = backend.load_skill("github-auth")
assert payload["name"] == "github-auth"
assert "Use git or gh." in payload["content"]
def test_app_artifact_index_queries_persisted_trajectories(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="u1")
app.run_task("Check the current system status.", channel="local", user_id="u2")
index = app.artifact_index()
telegram_trajs = index.query(channel="telegram")
tool_trajs = index.query(decision_type="call_tool")
assert len(telegram_trajs) == 1
assert telegram_trajs[0]["task"]["input"] == "Use my telegram preference for this answer."
assert len(tool_trajs) == 1
assert tool_trajs[0]["task"]["input"] == "Check the current system status."
slice_ids = index.slice_dataset(channel="local")
assert len(slice_ids) == 1
def test_app_run_online_learning_cycle_returns_report(tmp_path: Path):
from memabra.benchmarks import BenchmarkTask
from memabra.promotion import PromotionPolicy
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
# Seed trajectories
for i in range(10):
app.run_task(f"Task {i}")
result = app.run_online_learning_cycle(
policy=PromotionPolicy(
min_reward_delta=-1.0,
max_error_rate_increase=1.0,
max_latency_increase_ms=10000.0,
required_task_count=1,
),
benchmark_tasks=[BenchmarkTask(user_input="Task 0")],
min_new_trajectories=1,
)
assert "skipped" in result
assert "promoted" in result or result["skipped"] is True
assert "report_id" in result
def test_app_run_online_learning_cycle_uses_baseline_version(tmp_path: Path):
from memabra.benchmarks import BenchmarkTask
from memabra.promotion import PromotionPolicy
from memabra.router import SimpleLearningRouter
from memabra.router_versioning import RouterVersionStore
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
for i in range(10):
app.run_task(f"Task {i}")
# Save a baseline version
baseline_router = SimpleLearningRouter()
baseline_router._weights = {"call_tool": {"input_length": 0.99}}
baseline_router._feature_keys = ["input_length"]
version_dir = tmp_path / "versions"
store = RouterVersionStore(base_dir=version_dir)
store.save(baseline_router, version_id="v-baseline")
# Change current router
app.set_router(SimpleLearningRouter())
result = app.run_online_learning_cycle(
policy=PromotionPolicy(
min_reward_delta=-1.0,
max_error_rate_increase=1.0,
max_latency_increase_ms=10000.0,
required_task_count=1,
),
benchmark_tasks=[BenchmarkTask(user_input="Task 0")],
min_new_trajectories=1,
version_store_base_dir=version_dir,
baseline_version_id="v-baseline",
)
assert result["skipped"] is False
assert "baseline_metrics" in result
assert "challenger_metrics" in result
def test_app_run_online_learning_cycle_rebuilds_case_index(tmp_path: Path):
from memabra.benchmarks import BenchmarkTask
from memabra.promotion import PromotionPolicy
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
for i in range(10):
app.run_task(f"Task {i}")
case_index_path = tmp_path / "case-index.json"
result = app.run_online_learning_cycle(
policy=PromotionPolicy(
min_reward_delta=-1.0,
max_error_rate_increase=1.0,
max_latency_increase_ms=10000.0,
required_task_count=1,
),
benchmark_tasks=[BenchmarkTask(user_input="Task 0")],
min_new_trajectories=1,
case_index_path=case_index_path,
)
assert result["skipped"] is False
assert case_index_path.exists()
from memabra.case_index import CaseIndex
index = CaseIndex.load(case_index_path)
assert index.best("Task 0") is not None
def test_app_build_case_index_from_trajectories(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
app.run_task("Hello world", channel="local", user_id="u1")
app.run_task("Hello world", channel="local", user_id="u2")
case_index = app.build_case_index()
assert case_index.best("Hello world") is not None
def test_app_save_and_load_case_index(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
app.run_task("Persist this case", channel="local", user_id="u1")
case_index_path = tmp_path / "case-index.json"
app.build_case_index()
app.save_case_index(case_index_path)
loaded_app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
loaded_app.load_case_index(case_index_path)
assert loaded_app.case_index is not None
assert loaded_app.case_index.best("Persist this case") is not None
def test_app_best_trajectory_for_input(tmp_path: Path):
app = build_demo_app(base_dir=tmp_path / "demo-artifacts")
trajectory = app.run_task("Find the best trajectory", channel="local", user_id="u1")
app.build_case_index()
best_id = app.best_trajectory_for("Find the best trajectory")
assert best_id == trajectory["trajectory_id"]