"""Walk-forward evaluation for historical opportunity datasets.""" from __future__ import annotations import json from collections import defaultdict from copy import deepcopy from datetime import datetime, timezone from pathlib import Path from statistics import mean from typing import Any from .market_service import normalize_symbol from .opportunity_service import _action_for_opportunity, _opportunity_thresholds from .signal_service import ( get_opportunity_model_weights, get_signal_interval, score_opportunity_signal, ) _OPTIMIZE_WEIGHT_KEYS = [ "trend", "compression", "breakout_proximity", "higher_lows", "range_position", "fresh_breakout", "volume", "momentum", "setup", "trigger", "liquidity", "volatility_penalty", "extension_penalty", ] _OPTIMIZE_MULTIPLIERS = [0.5, 0.75, 1.25, 1.5] def _as_float(value: Any, default: float = 0.0) -> float: try: return float(value) except (TypeError, ValueError): return default def _as_int(value: Any, default: int = 0) -> int: try: return int(value) except (TypeError, ValueError): return default def _parse_dt(value: Any) -> datetime | None: if not value: return None try: return datetime.fromisoformat(str(value).replace("Z", "+00:00")).astimezone(timezone.utc) except ValueError: return None def _iso_from_ms(value: int) -> str: return datetime.fromtimestamp(value / 1000, tz=timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") def _close(row: list[Any]) -> float: return _as_float(row[4]) def _high(row: list[Any]) -> float: return _as_float(row[2]) def _low(row: list[Any]) -> float: return _as_float(row[3]) def _volume(row: list[Any]) -> float: return _as_float(row[5]) def _quote_volume(row: list[Any]) -> float: if len(row) > 7: return _as_float(row[7]) return _close(row) * _volume(row) def _open_ms(row: list[Any]) -> int: return int(row[0]) def _ticker_from_window(symbol: str, rows: list[list[Any]]) -> dict[str, Any]: first = _close(rows[0]) last = _close(rows[-1]) price_change_pct = ((last - first) / first * 100.0) if first else 0.0 return { "symbol": symbol, "price_change_pct": price_change_pct, "quote_volume": sum(_quote_volume(row) for row in rows), "high_price": max(_high(row) for row in rows), "low_price": min(_low(row) for row in rows), } def _window_series(rows: list[list[Any]]) -> tuple[list[float], list[float]]: return [_close(row) for row in rows], [_volume(row) for row in rows] def _pct(new: float, old: float) -> float: if old == 0: return 0.0 return (new - old) / old def _path_stats(entry: float, future_rows: list[list[Any]], take_profit: float, stop_loss: float) -> dict[str, Any]: if not future_rows: return { "event": "missing", "exit_return": 0.0, "final_return": 0.0, "max_upside": 0.0, "max_drawdown": 0.0, "bars": 0, } for row in future_rows: high_return = _pct(_high(row), entry) low_return = _pct(_low(row), entry) if low_return <= -stop_loss: return { "event": "stop", "exit_return": -stop_loss, "final_return": _pct(_close(future_rows[-1]), entry), "max_upside": max(_pct(_high(item), entry) for item in future_rows), "max_drawdown": min(_pct(_low(item), entry) for item in future_rows), "bars": len(future_rows), } if high_return >= take_profit: return { "event": "target", "exit_return": take_profit, "final_return": _pct(_close(future_rows[-1]), entry), "max_upside": max(_pct(_high(item), entry) for item in future_rows), "max_drawdown": min(_pct(_low(item), entry) for item in future_rows), "bars": len(future_rows), } return { "event": "horizon", "exit_return": _pct(_close(future_rows[-1]), entry), "final_return": _pct(_close(future_rows[-1]), entry), "max_upside": max(_pct(_high(item), entry) for item in future_rows), "max_drawdown": min(_pct(_low(item), entry) for item in future_rows), "bars": len(future_rows), } def _is_correct(action: str, trigger_path: dict[str, Any], setup_path: dict[str, Any]) -> bool: if action == "entry": return str(trigger_path["event"]) == "target" if action == "watch": return str(setup_path["event"]) == "target" if action == "avoid": return str(setup_path["event"]) != "target" return False def _round_float(value: Any, digits: int = 4) -> float: return round(_as_float(value), digits) def _finalize_bucket(bucket: dict[str, Any]) -> dict[str, Any]: count = int(bucket["count"]) correct = int(bucket["correct"]) returns = bucket["forward_returns"] trade_returns = bucket["trade_returns"] return { "count": count, "correct": correct, "incorrect": count - correct, "accuracy": round(correct / count, 4) if count else 0.0, "avg_forward_return": round(mean(returns), 4) if returns else 0.0, "avg_trade_return": round(mean(trade_returns), 4) if trade_returns else 0.0, } def _bucket() -> dict[str, Any]: return {"count": 0, "correct": 0, "forward_returns": [], "trade_returns": []} def evaluate_opportunity_dataset( config: dict[str, Any], *, dataset_path: str, horizon_hours: float | None = None, take_profit: float | None = None, stop_loss: float | None = None, setup_target: float | None = None, lookback: int | None = None, top_n: int | None = None, max_examples: int = 20, ) -> dict[str, Any]: """Evaluate opportunity actions using only point-in-time historical candles.""" dataset_file = Path(dataset_path).expanduser() dataset = json.loads(dataset_file.read_text(encoding="utf-8")) metadata = dataset.get("metadata", {}) plan = metadata.get("plan", {}) klines = dataset.get("klines", {}) opportunity_config = config.get("opportunity", {}) intervals = list(plan.get("intervals") or []) configured_interval = get_signal_interval(config) primary_interval = configured_interval if configured_interval in intervals else (intervals[0] if intervals else "1h") simulation_start = _parse_dt(plan.get("simulation_start")) simulation_end = _parse_dt(plan.get("simulation_end")) if simulation_start is None or simulation_end is None: raise ValueError("dataset metadata must include plan.simulation_start and plan.simulation_end") horizon = _as_float(horizon_hours, 0.0) if horizon <= 0: horizon = _as_float(plan.get("simulate_days"), 0.0) * 24.0 if horizon <= 0: horizon = _as_float(opportunity_config.get("evaluation_horizon_hours"), 24.0) take_profit_value = take_profit if take_profit is not None else _as_float(opportunity_config.get("evaluation_take_profit_pct"), 2.0) / 100.0 stop_loss_value = stop_loss if stop_loss is not None else _as_float(opportunity_config.get("evaluation_stop_loss_pct"), 1.5) / 100.0 setup_target_value = setup_target if setup_target is not None else _as_float(opportunity_config.get("evaluation_setup_target_pct"), 1.0) / 100.0 lookback_bars = lookback or _as_int(opportunity_config.get("evaluation_lookback"), 24) selected_top_n = top_n or _as_int(opportunity_config.get("top_n"), 10) thresholds = _opportunity_thresholds(config) horizon_ms = int(horizon * 60 * 60 * 1000) start_ms = int(simulation_start.timestamp() * 1000) end_ms = int(simulation_end.timestamp() * 1000) rows_by_symbol: dict[str, list[list[Any]]] = {} index_by_symbol: dict[str, dict[int, int]] = {} for symbol, by_interval in klines.items(): rows = by_interval.get(primary_interval, []) normalized = normalize_symbol(symbol) if rows: rows_by_symbol[normalized] = rows index_by_symbol[normalized] = {_open_ms(row): index for index, row in enumerate(rows)} decision_times = sorted( { _open_ms(row) for rows in rows_by_symbol.values() for row in rows if start_ms <= _open_ms(row) < end_ms } ) judgments: list[dict[str, Any]] = [] skipped_missing_future = 0 skipped_warmup = 0 for decision_time in decision_times: candidates: list[dict[str, Any]] = [] for symbol, rows in rows_by_symbol.items(): index = index_by_symbol[symbol].get(decision_time) if index is None: continue window = rows[max(0, index - lookback_bars + 1) : index + 1] if len(window) < lookback_bars: skipped_warmup += 1 continue future_rows = [row for row in rows[index + 1 :] if _open_ms(row) <= decision_time + horizon_ms] if not future_rows: skipped_missing_future += 1 continue closes, volumes = _window_series(window) ticker = _ticker_from_window(symbol, window) opportunity_score, metrics = score_opportunity_signal(closes, volumes, ticker, opportunity_config) score = opportunity_score metrics["opportunity_score"] = round(opportunity_score, 4) metrics["position_weight"] = 0.0 metrics["research_score"] = 0.0 action, reasons, _confidence = _action_for_opportunity(score, metrics, thresholds) candidates.append( { "symbol": symbol, "time": decision_time, "action": action, "score": round(score, 4), "metrics": metrics, "reasons": reasons, "entry_price": _close(window[-1]), "future_rows": future_rows, } ) for rank, candidate in enumerate(sorted(candidates, key=lambda item: item["score"], reverse=True)[:selected_top_n], start=1): trigger_path = _path_stats(candidate["entry_price"], candidate["future_rows"], take_profit_value, stop_loss_value) setup_path = _path_stats(candidate["entry_price"], candidate["future_rows"], setup_target_value, stop_loss_value) correct = _is_correct(candidate["action"], trigger_path, setup_path) judgments.append( { "time": _iso_from_ms(candidate["time"]), "rank": rank, "symbol": candidate["symbol"], "action": candidate["action"], "score": candidate["score"], "correct": correct, "entry_price": round(candidate["entry_price"], 8), "forward_return": _round_float(trigger_path["final_return"]), "max_upside": _round_float(trigger_path["max_upside"]), "max_drawdown": _round_float(trigger_path["max_drawdown"]), "trade_return": _round_float(trigger_path["exit_return"]) if candidate["action"] == "entry" else 0.0, "trigger_event": trigger_path["event"], "setup_event": setup_path["event"], "metrics": candidate["metrics"], "reason": candidate["reasons"][0] if candidate["reasons"] else "", } ) overall = _bucket() by_action: dict[str, dict[str, Any]] = defaultdict(_bucket) trigger_returns: list[float] = [] for judgment in judgments: action = judgment["action"] for bucket in (overall, by_action[action]): bucket["count"] += 1 bucket["correct"] += 1 if judgment["correct"] else 0 bucket["forward_returns"].append(judgment["forward_return"]) if action == "entry": bucket["trade_returns"].append(judgment["trade_return"]) if action == "entry": trigger_returns.append(judgment["trade_return"]) by_action_result = {action: _finalize_bucket(bucket) for action, bucket in sorted(by_action.items())} incorrect_examples = [item for item in judgments if not item["correct"]][:max_examples] examples = judgments[:max_examples] trigger_count = by_action_result.get("entry", {}).get("count", 0) trigger_correct = by_action_result.get("entry", {}).get("correct", 0) return { "summary": { **_finalize_bucket(overall), "decision_times": len(decision_times), "symbols": sorted(rows_by_symbol), "interval": primary_interval, "top_n": selected_top_n, "skipped_warmup": skipped_warmup, "skipped_missing_future": skipped_missing_future, }, "by_action": by_action_result, "trade_simulation": { "trigger_trades": trigger_count, "wins": trigger_correct, "losses": trigger_count - trigger_correct, "win_rate": round(trigger_correct / trigger_count, 4) if trigger_count else 0.0, "avg_trade_return": round(mean(trigger_returns), 4) if trigger_returns else 0.0, }, "rules": { "dataset": str(dataset_file), "interval": primary_interval, "horizon_hours": round(horizon, 4), "lookback_bars": lookback_bars, "take_profit": round(take_profit_value, 4), "stop_loss": round(stop_loss_value, 4), "setup_target": round(setup_target_value, 4), "same_candle_policy": "stop_first", "research_mode": "disabled: dataset has no point-in-time research snapshots", }, "examples": examples, "incorrect_examples": incorrect_examples, } def _objective(result: dict[str, Any]) -> float: summary = result.get("summary", {}) by_action = result.get("by_action", {}) trade = result.get("trade_simulation", {}) count = _as_float(summary.get("count")) trigger_trades = _as_float(trade.get("trigger_trades")) trigger_rate = trigger_trades / count if count else 0.0 avg_trade_return = _as_float(trade.get("avg_trade_return")) bounded_trade_return = max(min(avg_trade_return, 0.03), -0.03) trigger_coverage = min(trigger_rate / 0.08, 1.0) return round( 0.45 * _as_float(summary.get("accuracy")) + 0.20 * _as_float(by_action.get("watch", {}).get("accuracy")) + 0.25 * _as_float(trade.get("win_rate")) + 6.0 * bounded_trade_return + 0.05 * trigger_coverage, 6, ) def _copy_config_with_weights(config: dict[str, Any], weights: dict[str, float]) -> dict[str, Any]: candidate = deepcopy(config) candidate.setdefault("opportunity", {})["model_weights"] = weights return candidate def _evaluation_snapshot(result: dict[str, Any], objective: float, weights: dict[str, float]) -> dict[str, Any]: return { "objective": objective, "weights": {key: round(value, 4) for key, value in sorted(weights.items())}, "summary": result.get("summary", {}), "by_action": result.get("by_action", {}), "trade_simulation": result.get("trade_simulation", {}), } def optimize_opportunity_model( config: dict[str, Any], *, dataset_path: str, horizon_hours: float | None = None, take_profit: float | None = None, stop_loss: float | None = None, setup_target: float | None = None, lookback: int | None = None, top_n: int | None = None, passes: int = 2, ) -> dict[str, Any]: """Coordinate-search model weights against a walk-forward dataset. This intentionally optimizes model feature weights only. Entry/watch policy thresholds remain fixed so the search improves signal construction instead of fitting decision cutoffs to a sample. """ base_weights = get_opportunity_model_weights(config.get("opportunity", {})) def evaluate(weights: dict[str, float]) -> tuple[dict[str, Any], float]: result = evaluate_opportunity_dataset( _copy_config_with_weights(config, weights), dataset_path=dataset_path, horizon_hours=horizon_hours, take_profit=take_profit, stop_loss=stop_loss, setup_target=setup_target, lookback=lookback, top_n=top_n, max_examples=0, ) return result, _objective(result) baseline_result, baseline_objective = evaluate(base_weights) best_weights = dict(base_weights) best_result = baseline_result best_objective = baseline_objective evaluations = 1 history: list[dict[str, Any]] = [ { "pass": 0, "key": "baseline", "multiplier": 1.0, "objective": baseline_objective, "accuracy": baseline_result["summary"]["accuracy"], "trigger_win_rate": baseline_result["trade_simulation"]["win_rate"], } ] for pass_index in range(max(passes, 0)): improved = False for key in _OPTIMIZE_WEIGHT_KEYS: current_value = best_weights.get(key, 0.0) if current_value <= 0: continue local_best_weights = best_weights local_best_result = best_result local_best_objective = best_objective local_best_multiplier = 1.0 for multiplier in _OPTIMIZE_MULTIPLIERS: candidate_weights = dict(best_weights) candidate_weights[key] = round(max(current_value * multiplier, 0.01), 4) candidate_result, candidate_objective = evaluate(candidate_weights) evaluations += 1 history.append( { "pass": pass_index + 1, "key": key, "multiplier": multiplier, "objective": candidate_objective, "accuracy": candidate_result["summary"]["accuracy"], "trigger_win_rate": candidate_result["trade_simulation"]["win_rate"], } ) if candidate_objective > local_best_objective: local_best_weights = candidate_weights local_best_result = candidate_result local_best_objective = candidate_objective local_best_multiplier = multiplier if local_best_objective > best_objective: best_weights = local_best_weights best_result = local_best_result best_objective = local_best_objective improved = True history.append( { "pass": pass_index + 1, "key": key, "multiplier": local_best_multiplier, "objective": best_objective, "accuracy": best_result["summary"]["accuracy"], "trigger_win_rate": best_result["trade_simulation"]["win_rate"], "selected": True, } ) if not improved: break recommended_config = { f"opportunity.model_weights.{key}": round(value, 4) for key, value in sorted(best_weights.items()) } return { "baseline": _evaluation_snapshot(baseline_result, baseline_objective, base_weights), "best": _evaluation_snapshot(best_result, best_objective, best_weights), "improvement": { "objective": round(best_objective - baseline_objective, 6), "accuracy": round( _as_float(best_result["summary"].get("accuracy")) - _as_float(baseline_result["summary"].get("accuracy")), 4, ), "trigger_win_rate": round( _as_float(best_result["trade_simulation"].get("win_rate")) - _as_float(baseline_result["trade_simulation"].get("win_rate")), 4, ), "avg_trade_return": round( _as_float(best_result["trade_simulation"].get("avg_trade_return")) - _as_float(baseline_result["trade_simulation"].get("avg_trade_return")), 4, ), }, "recommended_config": recommended_config, "search": { "passes": passes, "evaluations": evaluations, "optimized": "model_weights_only", "thresholds": "fixed", "objective": "0.45*accuracy + 0.20*setup_accuracy + 0.25*trigger_win_rate + 6*avg_trade_return + 0.05*trigger_coverage", }, "history": history[-20:], }