from pathlib import Path from memabra.app import MemabraApp, build_app_with_skills, build_demo_app def test_build_demo_app_runs_task_and_produces_summary(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") trajectory = app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="oza") summary = app.replay_summary() assert trajectory["trajectory_id"].startswith("traj-") assert summary.trajectories == 1 assert any(event["event_type"] == "memory_injected" for event in trajectory["events"]) assert len(list((tmp_path / "demo-artifacts" / "trajectories").glob("*.json"))) == 1 def test_app_can_run_tool_task_with_demo_backend(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") trajectory = app.run_task("Check the current system status.") assert trajectory["decisions"][0]["decision_type"] == "call_tool" assert any(event["event_type"] == "tool_result" for event in trajectory["events"]) assert trajectory["outcome"]["status"] == "success" def test_build_app_with_skills_loads_real_skill_from_filesystem(tmp_path: Path): skill_dir = tmp_path / "skills" / "github-auth" skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( "---\n" "name: github-auth\n" "description: Authenticate with GitHub.\n" "---\n\n" "# GitHub Auth\n\n" "Use git or gh.\n" ) app = build_app_with_skills(base_dir=tmp_path / "artifacts", skill_search_paths=[tmp_path / "skills"]) # github-auth is not in the candidate set by default, so router won't trigger it. # We test that the app builds and a memory task still works. trajectory = app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="oza") assert trajectory["decisions"][0]["decision_type"] == "inject_memory" # Now verify the skill backend is actually wired by loading directly backend = app.runner.execution_engine.skill_executor.backend payload = backend.load_skill("github-auth") assert payload["name"] == "github-auth" assert "Use git or gh." in payload["content"] def test_app_artifact_index_queries_persisted_trajectories(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") app.run_task("Use my telegram preference for this answer.", channel="telegram", user_id="u1") app.run_task("Check the current system status.", channel="local", user_id="u2") index = app.artifact_index() telegram_trajs = index.query(channel="telegram") tool_trajs = index.query(decision_type="call_tool") assert len(telegram_trajs) == 1 assert telegram_trajs[0]["task"]["input"] == "Use my telegram preference for this answer." assert len(tool_trajs) == 1 assert tool_trajs[0]["task"]["input"] == "Check the current system status." slice_ids = index.slice_dataset(channel="local") assert len(slice_ids) == 1 def test_app_run_online_learning_cycle_returns_report(tmp_path: Path): from memabra.benchmarks import BenchmarkTask from memabra.promotion import PromotionPolicy app = build_demo_app(base_dir=tmp_path / "demo-artifacts") # Seed trajectories for i in range(10): app.run_task(f"Task {i}") result = app.run_online_learning_cycle( policy=PromotionPolicy( min_reward_delta=-1.0, max_error_rate_increase=1.0, max_latency_increase_ms=10000.0, required_task_count=1, ), benchmark_tasks=[BenchmarkTask(user_input="Task 0")], min_new_trajectories=1, ) assert "skipped" in result assert "promoted" in result or result["skipped"] is True assert "report_id" in result def test_app_run_online_learning_cycle_uses_baseline_version(tmp_path: Path): from memabra.benchmarks import BenchmarkTask from memabra.promotion import PromotionPolicy from memabra.router import SimpleLearningRouter from memabra.router_versioning import RouterVersionStore app = build_demo_app(base_dir=tmp_path / "demo-artifacts") for i in range(10): app.run_task(f"Task {i}") # Save a baseline version baseline_router = SimpleLearningRouter() baseline_router._weights = {"call_tool": {"input_length": 0.99}} baseline_router._feature_keys = ["input_length"] version_dir = tmp_path / "versions" store = RouterVersionStore(base_dir=version_dir) store.save(baseline_router, version_id="v-baseline") # Change current router app.set_router(SimpleLearningRouter()) result = app.run_online_learning_cycle( policy=PromotionPolicy( min_reward_delta=-1.0, max_error_rate_increase=1.0, max_latency_increase_ms=10000.0, required_task_count=1, ), benchmark_tasks=[BenchmarkTask(user_input="Task 0")], min_new_trajectories=1, version_store_base_dir=version_dir, baseline_version_id="v-baseline", ) assert result["skipped"] is False assert "baseline_metrics" in result assert "challenger_metrics" in result def test_app_run_online_learning_cycle_rebuilds_case_index(tmp_path: Path): from memabra.benchmarks import BenchmarkTask from memabra.promotion import PromotionPolicy app = build_demo_app(base_dir=tmp_path / "demo-artifacts") for i in range(10): app.run_task(f"Task {i}") case_index_path = tmp_path / "case-index.json" result = app.run_online_learning_cycle( policy=PromotionPolicy( min_reward_delta=-1.0, max_error_rate_increase=1.0, max_latency_increase_ms=10000.0, required_task_count=1, ), benchmark_tasks=[BenchmarkTask(user_input="Task 0")], min_new_trajectories=1, case_index_path=case_index_path, ) assert result["skipped"] is False assert case_index_path.exists() from memabra.case_index import CaseIndex index = CaseIndex.load(case_index_path) assert index.best("Task 0") is not None def test_app_build_case_index_from_trajectories(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") app.run_task("Hello world", channel="local", user_id="u1") app.run_task("Hello world", channel="local", user_id="u2") case_index = app.build_case_index() assert case_index.best("Hello world") is not None def test_app_save_and_load_case_index(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") app.run_task("Persist this case", channel="local", user_id="u1") case_index_path = tmp_path / "case-index.json" app.build_case_index() app.save_case_index(case_index_path) loaded_app = build_demo_app(base_dir=tmp_path / "demo-artifacts") loaded_app.load_case_index(case_index_path) assert loaded_app.case_index is not None assert loaded_app.case_index.best("Persist this case") is not None def test_app_best_trajectory_for_input(tmp_path: Path): app = build_demo_app(base_dir=tmp_path / "demo-artifacts") trajectory = app.run_task("Find the best trajectory", channel="local", user_id="u1") app.build_case_index() best_id = app.best_trajectory_for("Find the best trajectory") assert best_id == trajectory["trajectory_id"]