from __future__ import annotations from memabra.benchmarks import BenchmarkSuite, BenchmarkTask, save_benchmark_suite, load_benchmark_suite, default_benchmark_suite def test_benchmark_suite_roundtrip(tmp_path): path = tmp_path / "suite.json" suite = BenchmarkSuite( name="test-suite", tasks=[ BenchmarkTask(user_input="Hello", channel="local", user_id="u1"), BenchmarkTask(user_input="World", channel="telegram"), ], ) save_benchmark_suite(suite, path) loaded = load_benchmark_suite(path) assert loaded.name == "test-suite" assert len(loaded.tasks) == 2 assert loaded.tasks[0].user_input == "Hello" assert loaded.tasks[0].channel == "local" assert loaded.tasks[0].user_id == "u1" assert loaded.tasks[1].user_input == "World" assert loaded.tasks[1].channel == "telegram" assert loaded.tasks[1].user_id is None def test_default_benchmark_suite_covers_expected_categories(): suite = default_benchmark_suite() assert suite.name == "default" assert len(suite.tasks) >= 4 inputs = [t.user_input.lower() for t in suite.tasks] assert any("memory" in i or "preference" in i for i in inputs) assert any("skill" in i or "deploy" in i for i in inputs) assert any("tool" in i or "status" in i for i in inputs) assert any("composite" in i or "multiple" in i for i in inputs)