Initial standalone memabra release
This commit is contained in:
38
tests/test_benchmarks.py
Normal file
38
tests/test_benchmarks.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from memabra.benchmarks import BenchmarkSuite, BenchmarkTask, save_benchmark_suite, load_benchmark_suite, default_benchmark_suite
|
||||
|
||||
|
||||
def test_benchmark_suite_roundtrip(tmp_path):
|
||||
path = tmp_path / "suite.json"
|
||||
suite = BenchmarkSuite(
|
||||
name="test-suite",
|
||||
tasks=[
|
||||
BenchmarkTask(user_input="Hello", channel="local", user_id="u1"),
|
||||
BenchmarkTask(user_input="World", channel="telegram"),
|
||||
],
|
||||
)
|
||||
|
||||
save_benchmark_suite(suite, path)
|
||||
loaded = load_benchmark_suite(path)
|
||||
|
||||
assert loaded.name == "test-suite"
|
||||
assert len(loaded.tasks) == 2
|
||||
assert loaded.tasks[0].user_input == "Hello"
|
||||
assert loaded.tasks[0].channel == "local"
|
||||
assert loaded.tasks[0].user_id == "u1"
|
||||
assert loaded.tasks[1].user_input == "World"
|
||||
assert loaded.tasks[1].channel == "telegram"
|
||||
assert loaded.tasks[1].user_id is None
|
||||
|
||||
|
||||
def test_default_benchmark_suite_covers_expected_categories():
|
||||
suite = default_benchmark_suite()
|
||||
|
||||
assert suite.name == "default"
|
||||
assert len(suite.tasks) >= 4
|
||||
inputs = [t.user_input.lower() for t in suite.tasks]
|
||||
assert any("memory" in i or "preference" in i for i in inputs)
|
||||
assert any("skill" in i or "deploy" in i for i in inputs)
|
||||
assert any("tool" in i or "status" in i for i in inputs)
|
||||
assert any("composite" in i or "multiple" in i for i in inputs)
|
||||
Reference in New Issue
Block a user