- Remove dead scoring code (_score_candidate, _action_for, etc.) and align action decisions directly with score_opportunity_signal metrics. - Reduce action surface from trigger/setup/chase/skip to entry/watch/avoid. - Add confidence field (0..100) mapped from edge_score. - Update evaluate/optimize ground-truth mapping and tests. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
537 lines
21 KiB
Python
537 lines
21 KiB
Python
"""Walk-forward evaluation for historical opportunity datasets."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from collections import defaultdict
|
|
from copy import deepcopy
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from statistics import mean
|
|
from typing import Any
|
|
|
|
from .market_service import normalize_symbol
|
|
from .opportunity_service import _action_for_opportunity, _opportunity_thresholds
|
|
from .signal_service import (
|
|
get_opportunity_model_weights,
|
|
get_signal_interval,
|
|
score_opportunity_signal,
|
|
)
|
|
|
|
_OPTIMIZE_WEIGHT_KEYS = [
|
|
"trend",
|
|
"compression",
|
|
"breakout_proximity",
|
|
"higher_lows",
|
|
"range_position",
|
|
"fresh_breakout",
|
|
"volume",
|
|
"momentum",
|
|
"setup",
|
|
"trigger",
|
|
"liquidity",
|
|
"volatility_penalty",
|
|
"extension_penalty",
|
|
]
|
|
_OPTIMIZE_MULTIPLIERS = [0.5, 0.75, 1.25, 1.5]
|
|
|
|
|
|
def _as_float(value: Any, default: float = 0.0) -> float:
|
|
try:
|
|
return float(value)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def _as_int(value: Any, default: int = 0) -> int:
|
|
try:
|
|
return int(value)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def _parse_dt(value: Any) -> datetime | None:
|
|
if not value:
|
|
return None
|
|
try:
|
|
return datetime.fromisoformat(str(value).replace("Z", "+00:00")).astimezone(timezone.utc)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def _iso_from_ms(value: int) -> str:
|
|
return datetime.fromtimestamp(value / 1000, tz=timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
|
|
|
|
def _close(row: list[Any]) -> float:
|
|
return _as_float(row[4])
|
|
|
|
|
|
def _high(row: list[Any]) -> float:
|
|
return _as_float(row[2])
|
|
|
|
|
|
def _low(row: list[Any]) -> float:
|
|
return _as_float(row[3])
|
|
|
|
|
|
def _volume(row: list[Any]) -> float:
|
|
return _as_float(row[5])
|
|
|
|
|
|
def _quote_volume(row: list[Any]) -> float:
|
|
if len(row) > 7:
|
|
return _as_float(row[7])
|
|
return _close(row) * _volume(row)
|
|
|
|
|
|
def _open_ms(row: list[Any]) -> int:
|
|
return int(row[0])
|
|
|
|
|
|
def _ticker_from_window(symbol: str, rows: list[list[Any]]) -> dict[str, Any]:
|
|
first = _close(rows[0])
|
|
last = _close(rows[-1])
|
|
price_change_pct = ((last - first) / first * 100.0) if first else 0.0
|
|
return {
|
|
"symbol": symbol,
|
|
"price_change_pct": price_change_pct,
|
|
"quote_volume": sum(_quote_volume(row) for row in rows),
|
|
"high_price": max(_high(row) for row in rows),
|
|
"low_price": min(_low(row) for row in rows),
|
|
}
|
|
|
|
|
|
def _window_series(rows: list[list[Any]]) -> tuple[list[float], list[float]]:
|
|
return [_close(row) for row in rows], [_volume(row) for row in rows]
|
|
|
|
|
|
def _pct(new: float, old: float) -> float:
|
|
if old == 0:
|
|
return 0.0
|
|
return (new - old) / old
|
|
|
|
|
|
def _path_stats(entry: float, future_rows: list[list[Any]], take_profit: float, stop_loss: float) -> dict[str, Any]:
|
|
if not future_rows:
|
|
return {
|
|
"event": "missing",
|
|
"exit_return": 0.0,
|
|
"final_return": 0.0,
|
|
"max_upside": 0.0,
|
|
"max_drawdown": 0.0,
|
|
"bars": 0,
|
|
}
|
|
|
|
for row in future_rows:
|
|
high_return = _pct(_high(row), entry)
|
|
low_return = _pct(_low(row), entry)
|
|
if low_return <= -stop_loss:
|
|
return {
|
|
"event": "stop",
|
|
"exit_return": -stop_loss,
|
|
"final_return": _pct(_close(future_rows[-1]), entry),
|
|
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
|
|
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
|
|
"bars": len(future_rows),
|
|
}
|
|
if high_return >= take_profit:
|
|
return {
|
|
"event": "target",
|
|
"exit_return": take_profit,
|
|
"final_return": _pct(_close(future_rows[-1]), entry),
|
|
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
|
|
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
|
|
"bars": len(future_rows),
|
|
}
|
|
|
|
return {
|
|
"event": "horizon",
|
|
"exit_return": _pct(_close(future_rows[-1]), entry),
|
|
"final_return": _pct(_close(future_rows[-1]), entry),
|
|
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
|
|
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
|
|
"bars": len(future_rows),
|
|
}
|
|
|
|
|
|
def _is_correct(action: str, trigger_path: dict[str, Any], setup_path: dict[str, Any]) -> bool:
|
|
if action == "entry":
|
|
return str(trigger_path["event"]) == "target"
|
|
if action == "watch":
|
|
return str(setup_path["event"]) == "target"
|
|
if action == "avoid":
|
|
return str(setup_path["event"]) != "target"
|
|
return False
|
|
|
|
|
|
def _round_float(value: Any, digits: int = 4) -> float:
|
|
return round(_as_float(value), digits)
|
|
|
|
|
|
def _finalize_bucket(bucket: dict[str, Any]) -> dict[str, Any]:
|
|
count = int(bucket["count"])
|
|
correct = int(bucket["correct"])
|
|
returns = bucket["forward_returns"]
|
|
trade_returns = bucket["trade_returns"]
|
|
return {
|
|
"count": count,
|
|
"correct": correct,
|
|
"incorrect": count - correct,
|
|
"accuracy": round(correct / count, 4) if count else 0.0,
|
|
"avg_forward_return": round(mean(returns), 4) if returns else 0.0,
|
|
"avg_trade_return": round(mean(trade_returns), 4) if trade_returns else 0.0,
|
|
}
|
|
|
|
|
|
def _bucket() -> dict[str, Any]:
|
|
return {"count": 0, "correct": 0, "forward_returns": [], "trade_returns": []}
|
|
|
|
|
|
def evaluate_opportunity_dataset(
|
|
config: dict[str, Any],
|
|
*,
|
|
dataset_path: str,
|
|
horizon_hours: float | None = None,
|
|
take_profit: float | None = None,
|
|
stop_loss: float | None = None,
|
|
setup_target: float | None = None,
|
|
lookback: int | None = None,
|
|
top_n: int | None = None,
|
|
max_examples: int = 20,
|
|
) -> dict[str, Any]:
|
|
"""Evaluate opportunity actions using only point-in-time historical candles."""
|
|
dataset_file = Path(dataset_path).expanduser()
|
|
dataset = json.loads(dataset_file.read_text(encoding="utf-8"))
|
|
metadata = dataset.get("metadata", {})
|
|
plan = metadata.get("plan", {})
|
|
klines = dataset.get("klines", {})
|
|
opportunity_config = config.get("opportunity", {})
|
|
|
|
intervals = list(plan.get("intervals") or [])
|
|
configured_interval = get_signal_interval(config)
|
|
primary_interval = configured_interval if configured_interval in intervals else (intervals[0] if intervals else "1h")
|
|
simulation_start = _parse_dt(plan.get("simulation_start"))
|
|
simulation_end = _parse_dt(plan.get("simulation_end"))
|
|
if simulation_start is None or simulation_end is None:
|
|
raise ValueError("dataset metadata must include plan.simulation_start and plan.simulation_end")
|
|
|
|
horizon = _as_float(horizon_hours, 0.0)
|
|
if horizon <= 0:
|
|
horizon = _as_float(plan.get("simulate_days"), 0.0) * 24.0
|
|
if horizon <= 0:
|
|
horizon = _as_float(opportunity_config.get("evaluation_horizon_hours"), 24.0)
|
|
|
|
take_profit_value = take_profit if take_profit is not None else _as_float(opportunity_config.get("evaluation_take_profit_pct"), 2.0) / 100.0
|
|
stop_loss_value = stop_loss if stop_loss is not None else _as_float(opportunity_config.get("evaluation_stop_loss_pct"), 1.5) / 100.0
|
|
setup_target_value = setup_target if setup_target is not None else _as_float(opportunity_config.get("evaluation_setup_target_pct"), 1.0) / 100.0
|
|
lookback_bars = lookback or _as_int(opportunity_config.get("evaluation_lookback"), 24)
|
|
selected_top_n = top_n or _as_int(opportunity_config.get("top_n"), 10)
|
|
thresholds = _opportunity_thresholds(config)
|
|
horizon_ms = int(horizon * 60 * 60 * 1000)
|
|
start_ms = int(simulation_start.timestamp() * 1000)
|
|
end_ms = int(simulation_end.timestamp() * 1000)
|
|
|
|
rows_by_symbol: dict[str, list[list[Any]]] = {}
|
|
index_by_symbol: dict[str, dict[int, int]] = {}
|
|
for symbol, by_interval in klines.items():
|
|
rows = by_interval.get(primary_interval, [])
|
|
normalized = normalize_symbol(symbol)
|
|
if rows:
|
|
rows_by_symbol[normalized] = rows
|
|
index_by_symbol[normalized] = {_open_ms(row): index for index, row in enumerate(rows)}
|
|
|
|
decision_times = sorted(
|
|
{
|
|
_open_ms(row)
|
|
for rows in rows_by_symbol.values()
|
|
for row in rows
|
|
if start_ms <= _open_ms(row) < end_ms
|
|
}
|
|
)
|
|
|
|
judgments: list[dict[str, Any]] = []
|
|
skipped_missing_future = 0
|
|
skipped_warmup = 0
|
|
for decision_time in decision_times:
|
|
candidates: list[dict[str, Any]] = []
|
|
for symbol, rows in rows_by_symbol.items():
|
|
index = index_by_symbol[symbol].get(decision_time)
|
|
if index is None:
|
|
continue
|
|
window = rows[max(0, index - lookback_bars + 1) : index + 1]
|
|
if len(window) < lookback_bars:
|
|
skipped_warmup += 1
|
|
continue
|
|
future_rows = [row for row in rows[index + 1 :] if _open_ms(row) <= decision_time + horizon_ms]
|
|
if not future_rows:
|
|
skipped_missing_future += 1
|
|
continue
|
|
closes, volumes = _window_series(window)
|
|
ticker = _ticker_from_window(symbol, window)
|
|
opportunity_score, metrics = score_opportunity_signal(closes, volumes, ticker, opportunity_config)
|
|
score = opportunity_score
|
|
metrics["opportunity_score"] = round(opportunity_score, 4)
|
|
metrics["position_weight"] = 0.0
|
|
metrics["research_score"] = 0.0
|
|
action, reasons, _confidence = _action_for_opportunity(score, metrics, thresholds)
|
|
candidates.append(
|
|
{
|
|
"symbol": symbol,
|
|
"time": decision_time,
|
|
"action": action,
|
|
"score": round(score, 4),
|
|
"metrics": metrics,
|
|
"reasons": reasons,
|
|
"entry_price": _close(window[-1]),
|
|
"future_rows": future_rows,
|
|
}
|
|
)
|
|
|
|
for rank, candidate in enumerate(sorted(candidates, key=lambda item: item["score"], reverse=True)[:selected_top_n], start=1):
|
|
trigger_path = _path_stats(candidate["entry_price"], candidate["future_rows"], take_profit_value, stop_loss_value)
|
|
setup_path = _path_stats(candidate["entry_price"], candidate["future_rows"], setup_target_value, stop_loss_value)
|
|
correct = _is_correct(candidate["action"], trigger_path, setup_path)
|
|
judgments.append(
|
|
{
|
|
"time": _iso_from_ms(candidate["time"]),
|
|
"rank": rank,
|
|
"symbol": candidate["symbol"],
|
|
"action": candidate["action"],
|
|
"score": candidate["score"],
|
|
"correct": correct,
|
|
"entry_price": round(candidate["entry_price"], 8),
|
|
"forward_return": _round_float(trigger_path["final_return"]),
|
|
"max_upside": _round_float(trigger_path["max_upside"]),
|
|
"max_drawdown": _round_float(trigger_path["max_drawdown"]),
|
|
"trade_return": _round_float(trigger_path["exit_return"]) if candidate["action"] == "entry" else 0.0,
|
|
"trigger_event": trigger_path["event"],
|
|
"setup_event": setup_path["event"],
|
|
"metrics": candidate["metrics"],
|
|
"reason": candidate["reasons"][0] if candidate["reasons"] else "",
|
|
}
|
|
)
|
|
|
|
overall = _bucket()
|
|
by_action: dict[str, dict[str, Any]] = defaultdict(_bucket)
|
|
trigger_returns: list[float] = []
|
|
for judgment in judgments:
|
|
action = judgment["action"]
|
|
for bucket in (overall, by_action[action]):
|
|
bucket["count"] += 1
|
|
bucket["correct"] += 1 if judgment["correct"] else 0
|
|
bucket["forward_returns"].append(judgment["forward_return"])
|
|
if action == "entry":
|
|
bucket["trade_returns"].append(judgment["trade_return"])
|
|
if action == "entry":
|
|
trigger_returns.append(judgment["trade_return"])
|
|
|
|
by_action_result = {action: _finalize_bucket(bucket) for action, bucket in sorted(by_action.items())}
|
|
incorrect_examples = [item for item in judgments if not item["correct"]][:max_examples]
|
|
examples = judgments[:max_examples]
|
|
trigger_count = by_action_result.get("entry", {}).get("count", 0)
|
|
trigger_correct = by_action_result.get("entry", {}).get("correct", 0)
|
|
return {
|
|
"summary": {
|
|
**_finalize_bucket(overall),
|
|
"decision_times": len(decision_times),
|
|
"symbols": sorted(rows_by_symbol),
|
|
"interval": primary_interval,
|
|
"top_n": selected_top_n,
|
|
"skipped_warmup": skipped_warmup,
|
|
"skipped_missing_future": skipped_missing_future,
|
|
},
|
|
"by_action": by_action_result,
|
|
"trade_simulation": {
|
|
"trigger_trades": trigger_count,
|
|
"wins": trigger_correct,
|
|
"losses": trigger_count - trigger_correct,
|
|
"win_rate": round(trigger_correct / trigger_count, 4) if trigger_count else 0.0,
|
|
"avg_trade_return": round(mean(trigger_returns), 4) if trigger_returns else 0.0,
|
|
},
|
|
"rules": {
|
|
"dataset": str(dataset_file),
|
|
"interval": primary_interval,
|
|
"horizon_hours": round(horizon, 4),
|
|
"lookback_bars": lookback_bars,
|
|
"take_profit": round(take_profit_value, 4),
|
|
"stop_loss": round(stop_loss_value, 4),
|
|
"setup_target": round(setup_target_value, 4),
|
|
"same_candle_policy": "stop_first",
|
|
"research_mode": "disabled: dataset has no point-in-time research snapshots",
|
|
},
|
|
"examples": examples,
|
|
"incorrect_examples": incorrect_examples,
|
|
}
|
|
|
|
|
|
def _objective(result: dict[str, Any]) -> float:
|
|
summary = result.get("summary", {})
|
|
by_action = result.get("by_action", {})
|
|
trade = result.get("trade_simulation", {})
|
|
count = _as_float(summary.get("count"))
|
|
trigger_trades = _as_float(trade.get("trigger_trades"))
|
|
trigger_rate = trigger_trades / count if count else 0.0
|
|
avg_trade_return = _as_float(trade.get("avg_trade_return"))
|
|
bounded_trade_return = max(min(avg_trade_return, 0.03), -0.03)
|
|
trigger_coverage = min(trigger_rate / 0.08, 1.0)
|
|
return round(
|
|
0.45 * _as_float(summary.get("accuracy"))
|
|
+ 0.20 * _as_float(by_action.get("watch", {}).get("accuracy"))
|
|
+ 0.25 * _as_float(trade.get("win_rate"))
|
|
+ 6.0 * bounded_trade_return
|
|
+ 0.05 * trigger_coverage,
|
|
6,
|
|
)
|
|
|
|
|
|
def _copy_config_with_weights(config: dict[str, Any], weights: dict[str, float]) -> dict[str, Any]:
|
|
candidate = deepcopy(config)
|
|
candidate.setdefault("opportunity", {})["model_weights"] = weights
|
|
return candidate
|
|
|
|
|
|
def _evaluation_snapshot(result: dict[str, Any], objective: float, weights: dict[str, float]) -> dict[str, Any]:
|
|
return {
|
|
"objective": objective,
|
|
"weights": {key: round(value, 4) for key, value in sorted(weights.items())},
|
|
"summary": result.get("summary", {}),
|
|
"by_action": result.get("by_action", {}),
|
|
"trade_simulation": result.get("trade_simulation", {}),
|
|
}
|
|
|
|
|
|
def optimize_opportunity_model(
|
|
config: dict[str, Any],
|
|
*,
|
|
dataset_path: str,
|
|
horizon_hours: float | None = None,
|
|
take_profit: float | None = None,
|
|
stop_loss: float | None = None,
|
|
setup_target: float | None = None,
|
|
lookback: int | None = None,
|
|
top_n: int | None = None,
|
|
passes: int = 2,
|
|
) -> dict[str, Any]:
|
|
"""Coordinate-search model weights against a walk-forward dataset.
|
|
|
|
This intentionally optimizes model feature weights only. Entry/watch policy
|
|
thresholds remain fixed so the search improves signal construction instead
|
|
of fitting decision cutoffs to a sample.
|
|
"""
|
|
base_weights = get_opportunity_model_weights(config.get("opportunity", {}))
|
|
|
|
def evaluate(weights: dict[str, float]) -> tuple[dict[str, Any], float]:
|
|
result = evaluate_opportunity_dataset(
|
|
_copy_config_with_weights(config, weights),
|
|
dataset_path=dataset_path,
|
|
horizon_hours=horizon_hours,
|
|
take_profit=take_profit,
|
|
stop_loss=stop_loss,
|
|
setup_target=setup_target,
|
|
lookback=lookback,
|
|
top_n=top_n,
|
|
max_examples=0,
|
|
)
|
|
return result, _objective(result)
|
|
|
|
baseline_result, baseline_objective = evaluate(base_weights)
|
|
best_weights = dict(base_weights)
|
|
best_result = baseline_result
|
|
best_objective = baseline_objective
|
|
evaluations = 1
|
|
history: list[dict[str, Any]] = [
|
|
{
|
|
"pass": 0,
|
|
"key": "baseline",
|
|
"multiplier": 1.0,
|
|
"objective": baseline_objective,
|
|
"accuracy": baseline_result["summary"]["accuracy"],
|
|
"trigger_win_rate": baseline_result["trade_simulation"]["win_rate"],
|
|
}
|
|
]
|
|
|
|
for pass_index in range(max(passes, 0)):
|
|
improved = False
|
|
for key in _OPTIMIZE_WEIGHT_KEYS:
|
|
current_value = best_weights.get(key, 0.0)
|
|
if current_value <= 0:
|
|
continue
|
|
local_best_weights = best_weights
|
|
local_best_result = best_result
|
|
local_best_objective = best_objective
|
|
local_best_multiplier = 1.0
|
|
for multiplier in _OPTIMIZE_MULTIPLIERS:
|
|
candidate_weights = dict(best_weights)
|
|
candidate_weights[key] = round(max(current_value * multiplier, 0.01), 4)
|
|
candidate_result, candidate_objective = evaluate(candidate_weights)
|
|
evaluations += 1
|
|
history.append(
|
|
{
|
|
"pass": pass_index + 1,
|
|
"key": key,
|
|
"multiplier": multiplier,
|
|
"objective": candidate_objective,
|
|
"accuracy": candidate_result["summary"]["accuracy"],
|
|
"trigger_win_rate": candidate_result["trade_simulation"]["win_rate"],
|
|
}
|
|
)
|
|
if candidate_objective > local_best_objective:
|
|
local_best_weights = candidate_weights
|
|
local_best_result = candidate_result
|
|
local_best_objective = candidate_objective
|
|
local_best_multiplier = multiplier
|
|
if local_best_objective > best_objective:
|
|
best_weights = local_best_weights
|
|
best_result = local_best_result
|
|
best_objective = local_best_objective
|
|
improved = True
|
|
history.append(
|
|
{
|
|
"pass": pass_index + 1,
|
|
"key": key,
|
|
"multiplier": local_best_multiplier,
|
|
"objective": best_objective,
|
|
"accuracy": best_result["summary"]["accuracy"],
|
|
"trigger_win_rate": best_result["trade_simulation"]["win_rate"],
|
|
"selected": True,
|
|
}
|
|
)
|
|
if not improved:
|
|
break
|
|
|
|
recommended_config = {
|
|
f"opportunity.model_weights.{key}": round(value, 4)
|
|
for key, value in sorted(best_weights.items())
|
|
}
|
|
return {
|
|
"baseline": _evaluation_snapshot(baseline_result, baseline_objective, base_weights),
|
|
"best": _evaluation_snapshot(best_result, best_objective, best_weights),
|
|
"improvement": {
|
|
"objective": round(best_objective - baseline_objective, 6),
|
|
"accuracy": round(
|
|
_as_float(best_result["summary"].get("accuracy")) - _as_float(baseline_result["summary"].get("accuracy")),
|
|
4,
|
|
),
|
|
"trigger_win_rate": round(
|
|
_as_float(best_result["trade_simulation"].get("win_rate"))
|
|
- _as_float(baseline_result["trade_simulation"].get("win_rate")),
|
|
4,
|
|
),
|
|
"avg_trade_return": round(
|
|
_as_float(best_result["trade_simulation"].get("avg_trade_return"))
|
|
- _as_float(baseline_result["trade_simulation"].get("avg_trade_return")),
|
|
4,
|
|
),
|
|
},
|
|
"recommended_config": recommended_config,
|
|
"search": {
|
|
"passes": passes,
|
|
"evaluations": evaluations,
|
|
"optimized": "model_weights_only",
|
|
"thresholds": "fixed",
|
|
"objective": "0.45*accuracy + 0.20*setup_accuracy + 0.25*trigger_win_rate + 6*avg_trade_return + 0.05*trigger_coverage",
|
|
},
|
|
"history": history[-20:],
|
|
}
|