Files
coinhunter-cli/src/coinhunter/services/opportunity_evaluation_service.py
TacitLab 003212de99 refactor: simplify opportunity actions to entry/watch/avoid with confidence
- Remove dead scoring code (_score_candidate, _action_for, etc.) and
  align action decisions directly with score_opportunity_signal metrics.
- Reduce action surface from trigger/setup/chase/skip to entry/watch/avoid.
- Add confidence field (0..100) mapped from edge_score.
- Update evaluate/optimize ground-truth mapping and tests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-22 01:08:34 +08:00

537 lines
21 KiB
Python

"""Walk-forward evaluation for historical opportunity datasets."""
from __future__ import annotations
import json
from collections import defaultdict
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from statistics import mean
from typing import Any
from .market_service import normalize_symbol
from .opportunity_service import _action_for_opportunity, _opportunity_thresholds
from .signal_service import (
get_opportunity_model_weights,
get_signal_interval,
score_opportunity_signal,
)
_OPTIMIZE_WEIGHT_KEYS = [
"trend",
"compression",
"breakout_proximity",
"higher_lows",
"range_position",
"fresh_breakout",
"volume",
"momentum",
"setup",
"trigger",
"liquidity",
"volatility_penalty",
"extension_penalty",
]
_OPTIMIZE_MULTIPLIERS = [0.5, 0.75, 1.25, 1.5]
def _as_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
def _as_int(value: Any, default: int = 0) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def _parse_dt(value: Any) -> datetime | None:
if not value:
return None
try:
return datetime.fromisoformat(str(value).replace("Z", "+00:00")).astimezone(timezone.utc)
except ValueError:
return None
def _iso_from_ms(value: int) -> str:
return datetime.fromtimestamp(value / 1000, tz=timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def _close(row: list[Any]) -> float:
return _as_float(row[4])
def _high(row: list[Any]) -> float:
return _as_float(row[2])
def _low(row: list[Any]) -> float:
return _as_float(row[3])
def _volume(row: list[Any]) -> float:
return _as_float(row[5])
def _quote_volume(row: list[Any]) -> float:
if len(row) > 7:
return _as_float(row[7])
return _close(row) * _volume(row)
def _open_ms(row: list[Any]) -> int:
return int(row[0])
def _ticker_from_window(symbol: str, rows: list[list[Any]]) -> dict[str, Any]:
first = _close(rows[0])
last = _close(rows[-1])
price_change_pct = ((last - first) / first * 100.0) if first else 0.0
return {
"symbol": symbol,
"price_change_pct": price_change_pct,
"quote_volume": sum(_quote_volume(row) for row in rows),
"high_price": max(_high(row) for row in rows),
"low_price": min(_low(row) for row in rows),
}
def _window_series(rows: list[list[Any]]) -> tuple[list[float], list[float]]:
return [_close(row) for row in rows], [_volume(row) for row in rows]
def _pct(new: float, old: float) -> float:
if old == 0:
return 0.0
return (new - old) / old
def _path_stats(entry: float, future_rows: list[list[Any]], take_profit: float, stop_loss: float) -> dict[str, Any]:
if not future_rows:
return {
"event": "missing",
"exit_return": 0.0,
"final_return": 0.0,
"max_upside": 0.0,
"max_drawdown": 0.0,
"bars": 0,
}
for row in future_rows:
high_return = _pct(_high(row), entry)
low_return = _pct(_low(row), entry)
if low_return <= -stop_loss:
return {
"event": "stop",
"exit_return": -stop_loss,
"final_return": _pct(_close(future_rows[-1]), entry),
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
"bars": len(future_rows),
}
if high_return >= take_profit:
return {
"event": "target",
"exit_return": take_profit,
"final_return": _pct(_close(future_rows[-1]), entry),
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
"bars": len(future_rows),
}
return {
"event": "horizon",
"exit_return": _pct(_close(future_rows[-1]), entry),
"final_return": _pct(_close(future_rows[-1]), entry),
"max_upside": max(_pct(_high(item), entry) for item in future_rows),
"max_drawdown": min(_pct(_low(item), entry) for item in future_rows),
"bars": len(future_rows),
}
def _is_correct(action: str, trigger_path: dict[str, Any], setup_path: dict[str, Any]) -> bool:
if action == "entry":
return str(trigger_path["event"]) == "target"
if action == "watch":
return str(setup_path["event"]) == "target"
if action == "avoid":
return str(setup_path["event"]) != "target"
return False
def _round_float(value: Any, digits: int = 4) -> float:
return round(_as_float(value), digits)
def _finalize_bucket(bucket: dict[str, Any]) -> dict[str, Any]:
count = int(bucket["count"])
correct = int(bucket["correct"])
returns = bucket["forward_returns"]
trade_returns = bucket["trade_returns"]
return {
"count": count,
"correct": correct,
"incorrect": count - correct,
"accuracy": round(correct / count, 4) if count else 0.0,
"avg_forward_return": round(mean(returns), 4) if returns else 0.0,
"avg_trade_return": round(mean(trade_returns), 4) if trade_returns else 0.0,
}
def _bucket() -> dict[str, Any]:
return {"count": 0, "correct": 0, "forward_returns": [], "trade_returns": []}
def evaluate_opportunity_dataset(
config: dict[str, Any],
*,
dataset_path: str,
horizon_hours: float | None = None,
take_profit: float | None = None,
stop_loss: float | None = None,
setup_target: float | None = None,
lookback: int | None = None,
top_n: int | None = None,
max_examples: int = 20,
) -> dict[str, Any]:
"""Evaluate opportunity actions using only point-in-time historical candles."""
dataset_file = Path(dataset_path).expanduser()
dataset = json.loads(dataset_file.read_text(encoding="utf-8"))
metadata = dataset.get("metadata", {})
plan = metadata.get("plan", {})
klines = dataset.get("klines", {})
opportunity_config = config.get("opportunity", {})
intervals = list(plan.get("intervals") or [])
configured_interval = get_signal_interval(config)
primary_interval = configured_interval if configured_interval in intervals else (intervals[0] if intervals else "1h")
simulation_start = _parse_dt(plan.get("simulation_start"))
simulation_end = _parse_dt(plan.get("simulation_end"))
if simulation_start is None or simulation_end is None:
raise ValueError("dataset metadata must include plan.simulation_start and plan.simulation_end")
horizon = _as_float(horizon_hours, 0.0)
if horizon <= 0:
horizon = _as_float(plan.get("simulate_days"), 0.0) * 24.0
if horizon <= 0:
horizon = _as_float(opportunity_config.get("evaluation_horizon_hours"), 24.0)
take_profit_value = take_profit if take_profit is not None else _as_float(opportunity_config.get("evaluation_take_profit_pct"), 2.0) / 100.0
stop_loss_value = stop_loss if stop_loss is not None else _as_float(opportunity_config.get("evaluation_stop_loss_pct"), 1.5) / 100.0
setup_target_value = setup_target if setup_target is not None else _as_float(opportunity_config.get("evaluation_setup_target_pct"), 1.0) / 100.0
lookback_bars = lookback or _as_int(opportunity_config.get("evaluation_lookback"), 24)
selected_top_n = top_n or _as_int(opportunity_config.get("top_n"), 10)
thresholds = _opportunity_thresholds(config)
horizon_ms = int(horizon * 60 * 60 * 1000)
start_ms = int(simulation_start.timestamp() * 1000)
end_ms = int(simulation_end.timestamp() * 1000)
rows_by_symbol: dict[str, list[list[Any]]] = {}
index_by_symbol: dict[str, dict[int, int]] = {}
for symbol, by_interval in klines.items():
rows = by_interval.get(primary_interval, [])
normalized = normalize_symbol(symbol)
if rows:
rows_by_symbol[normalized] = rows
index_by_symbol[normalized] = {_open_ms(row): index for index, row in enumerate(rows)}
decision_times = sorted(
{
_open_ms(row)
for rows in rows_by_symbol.values()
for row in rows
if start_ms <= _open_ms(row) < end_ms
}
)
judgments: list[dict[str, Any]] = []
skipped_missing_future = 0
skipped_warmup = 0
for decision_time in decision_times:
candidates: list[dict[str, Any]] = []
for symbol, rows in rows_by_symbol.items():
index = index_by_symbol[symbol].get(decision_time)
if index is None:
continue
window = rows[max(0, index - lookback_bars + 1) : index + 1]
if len(window) < lookback_bars:
skipped_warmup += 1
continue
future_rows = [row for row in rows[index + 1 :] if _open_ms(row) <= decision_time + horizon_ms]
if not future_rows:
skipped_missing_future += 1
continue
closes, volumes = _window_series(window)
ticker = _ticker_from_window(symbol, window)
opportunity_score, metrics = score_opportunity_signal(closes, volumes, ticker, opportunity_config)
score = opportunity_score
metrics["opportunity_score"] = round(opportunity_score, 4)
metrics["position_weight"] = 0.0
metrics["research_score"] = 0.0
action, reasons, _confidence = _action_for_opportunity(score, metrics, thresholds)
candidates.append(
{
"symbol": symbol,
"time": decision_time,
"action": action,
"score": round(score, 4),
"metrics": metrics,
"reasons": reasons,
"entry_price": _close(window[-1]),
"future_rows": future_rows,
}
)
for rank, candidate in enumerate(sorted(candidates, key=lambda item: item["score"], reverse=True)[:selected_top_n], start=1):
trigger_path = _path_stats(candidate["entry_price"], candidate["future_rows"], take_profit_value, stop_loss_value)
setup_path = _path_stats(candidate["entry_price"], candidate["future_rows"], setup_target_value, stop_loss_value)
correct = _is_correct(candidate["action"], trigger_path, setup_path)
judgments.append(
{
"time": _iso_from_ms(candidate["time"]),
"rank": rank,
"symbol": candidate["symbol"],
"action": candidate["action"],
"score": candidate["score"],
"correct": correct,
"entry_price": round(candidate["entry_price"], 8),
"forward_return": _round_float(trigger_path["final_return"]),
"max_upside": _round_float(trigger_path["max_upside"]),
"max_drawdown": _round_float(trigger_path["max_drawdown"]),
"trade_return": _round_float(trigger_path["exit_return"]) if candidate["action"] == "entry" else 0.0,
"trigger_event": trigger_path["event"],
"setup_event": setup_path["event"],
"metrics": candidate["metrics"],
"reason": candidate["reasons"][0] if candidate["reasons"] else "",
}
)
overall = _bucket()
by_action: dict[str, dict[str, Any]] = defaultdict(_bucket)
trigger_returns: list[float] = []
for judgment in judgments:
action = judgment["action"]
for bucket in (overall, by_action[action]):
bucket["count"] += 1
bucket["correct"] += 1 if judgment["correct"] else 0
bucket["forward_returns"].append(judgment["forward_return"])
if action == "entry":
bucket["trade_returns"].append(judgment["trade_return"])
if action == "entry":
trigger_returns.append(judgment["trade_return"])
by_action_result = {action: _finalize_bucket(bucket) for action, bucket in sorted(by_action.items())}
incorrect_examples = [item for item in judgments if not item["correct"]][:max_examples]
examples = judgments[:max_examples]
trigger_count = by_action_result.get("entry", {}).get("count", 0)
trigger_correct = by_action_result.get("entry", {}).get("correct", 0)
return {
"summary": {
**_finalize_bucket(overall),
"decision_times": len(decision_times),
"symbols": sorted(rows_by_symbol),
"interval": primary_interval,
"top_n": selected_top_n,
"skipped_warmup": skipped_warmup,
"skipped_missing_future": skipped_missing_future,
},
"by_action": by_action_result,
"trade_simulation": {
"trigger_trades": trigger_count,
"wins": trigger_correct,
"losses": trigger_count - trigger_correct,
"win_rate": round(trigger_correct / trigger_count, 4) if trigger_count else 0.0,
"avg_trade_return": round(mean(trigger_returns), 4) if trigger_returns else 0.0,
},
"rules": {
"dataset": str(dataset_file),
"interval": primary_interval,
"horizon_hours": round(horizon, 4),
"lookback_bars": lookback_bars,
"take_profit": round(take_profit_value, 4),
"stop_loss": round(stop_loss_value, 4),
"setup_target": round(setup_target_value, 4),
"same_candle_policy": "stop_first",
"research_mode": "disabled: dataset has no point-in-time research snapshots",
},
"examples": examples,
"incorrect_examples": incorrect_examples,
}
def _objective(result: dict[str, Any]) -> float:
summary = result.get("summary", {})
by_action = result.get("by_action", {})
trade = result.get("trade_simulation", {})
count = _as_float(summary.get("count"))
trigger_trades = _as_float(trade.get("trigger_trades"))
trigger_rate = trigger_trades / count if count else 0.0
avg_trade_return = _as_float(trade.get("avg_trade_return"))
bounded_trade_return = max(min(avg_trade_return, 0.03), -0.03)
trigger_coverage = min(trigger_rate / 0.08, 1.0)
return round(
0.45 * _as_float(summary.get("accuracy"))
+ 0.20 * _as_float(by_action.get("watch", {}).get("accuracy"))
+ 0.25 * _as_float(trade.get("win_rate"))
+ 6.0 * bounded_trade_return
+ 0.05 * trigger_coverage,
6,
)
def _copy_config_with_weights(config: dict[str, Any], weights: dict[str, float]) -> dict[str, Any]:
candidate = deepcopy(config)
candidate.setdefault("opportunity", {})["model_weights"] = weights
return candidate
def _evaluation_snapshot(result: dict[str, Any], objective: float, weights: dict[str, float]) -> dict[str, Any]:
return {
"objective": objective,
"weights": {key: round(value, 4) for key, value in sorted(weights.items())},
"summary": result.get("summary", {}),
"by_action": result.get("by_action", {}),
"trade_simulation": result.get("trade_simulation", {}),
}
def optimize_opportunity_model(
config: dict[str, Any],
*,
dataset_path: str,
horizon_hours: float | None = None,
take_profit: float | None = None,
stop_loss: float | None = None,
setup_target: float | None = None,
lookback: int | None = None,
top_n: int | None = None,
passes: int = 2,
) -> dict[str, Any]:
"""Coordinate-search model weights against a walk-forward dataset.
This intentionally optimizes model feature weights only. Entry/watch policy
thresholds remain fixed so the search improves signal construction instead
of fitting decision cutoffs to a sample.
"""
base_weights = get_opportunity_model_weights(config.get("opportunity", {}))
def evaluate(weights: dict[str, float]) -> tuple[dict[str, Any], float]:
result = evaluate_opportunity_dataset(
_copy_config_with_weights(config, weights),
dataset_path=dataset_path,
horizon_hours=horizon_hours,
take_profit=take_profit,
stop_loss=stop_loss,
setup_target=setup_target,
lookback=lookback,
top_n=top_n,
max_examples=0,
)
return result, _objective(result)
baseline_result, baseline_objective = evaluate(base_weights)
best_weights = dict(base_weights)
best_result = baseline_result
best_objective = baseline_objective
evaluations = 1
history: list[dict[str, Any]] = [
{
"pass": 0,
"key": "baseline",
"multiplier": 1.0,
"objective": baseline_objective,
"accuracy": baseline_result["summary"]["accuracy"],
"trigger_win_rate": baseline_result["trade_simulation"]["win_rate"],
}
]
for pass_index in range(max(passes, 0)):
improved = False
for key in _OPTIMIZE_WEIGHT_KEYS:
current_value = best_weights.get(key, 0.0)
if current_value <= 0:
continue
local_best_weights = best_weights
local_best_result = best_result
local_best_objective = best_objective
local_best_multiplier = 1.0
for multiplier in _OPTIMIZE_MULTIPLIERS:
candidate_weights = dict(best_weights)
candidate_weights[key] = round(max(current_value * multiplier, 0.01), 4)
candidate_result, candidate_objective = evaluate(candidate_weights)
evaluations += 1
history.append(
{
"pass": pass_index + 1,
"key": key,
"multiplier": multiplier,
"objective": candidate_objective,
"accuracy": candidate_result["summary"]["accuracy"],
"trigger_win_rate": candidate_result["trade_simulation"]["win_rate"],
}
)
if candidate_objective > local_best_objective:
local_best_weights = candidate_weights
local_best_result = candidate_result
local_best_objective = candidate_objective
local_best_multiplier = multiplier
if local_best_objective > best_objective:
best_weights = local_best_weights
best_result = local_best_result
best_objective = local_best_objective
improved = True
history.append(
{
"pass": pass_index + 1,
"key": key,
"multiplier": local_best_multiplier,
"objective": best_objective,
"accuracy": best_result["summary"]["accuracy"],
"trigger_win_rate": best_result["trade_simulation"]["win_rate"],
"selected": True,
}
)
if not improved:
break
recommended_config = {
f"opportunity.model_weights.{key}": round(value, 4)
for key, value in sorted(best_weights.items())
}
return {
"baseline": _evaluation_snapshot(baseline_result, baseline_objective, base_weights),
"best": _evaluation_snapshot(best_result, best_objective, best_weights),
"improvement": {
"objective": round(best_objective - baseline_objective, 6),
"accuracy": round(
_as_float(best_result["summary"].get("accuracy")) - _as_float(baseline_result["summary"].get("accuracy")),
4,
),
"trigger_win_rate": round(
_as_float(best_result["trade_simulation"].get("win_rate"))
- _as_float(baseline_result["trade_simulation"].get("win_rate")),
4,
),
"avg_trade_return": round(
_as_float(best_result["trade_simulation"].get("avg_trade_return"))
- _as_float(baseline_result["trade_simulation"].get("avg_trade_return")),
4,
),
},
"recommended_config": recommended_config,
"search": {
"passes": passes,
"evaluations": evaluations,
"optimized": "model_weights_only",
"thresholds": "fixed",
"objective": "0.45*accuracy + 0.20*setup_accuracy + 0.25*trigger_win_rate + 6*avg_trade_return + 0.05*trigger_coverage",
},
"history": history[-20:],
}