Add opportunity dataset collection

This commit is contained in:
Carlos Ouyang
2026-04-21 19:41:48 +08:00
parent 50402e4aa7
commit 436bef4814
10 changed files with 1295 additions and 32 deletions

View File

@@ -0,0 +1,353 @@
"""Historical dataset collection for opportunity evaluation."""
from __future__ import annotations
import json
from collections.abc import Callable
from dataclasses import asdict, dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import parse_qs, urlencode, urlparse
from urllib.request import Request, urlopen
from ..runtime import get_runtime_paths
from .market_service import normalize_symbol, normalize_symbols
HttpGet = Callable[[str, dict[str, str], float], Any]
_INTERVAL_SECONDS = {
"1m": 60,
"3m": 180,
"5m": 300,
"15m": 900,
"30m": 1800,
"1h": 3600,
"2h": 7200,
"4h": 14400,
"6h": 21600,
"8h": 28800,
"12h": 43200,
"1d": 86400,
"3d": 259200,
"1w": 604800,
}
@dataclass(frozen=True)
class DatasetPlan:
intervals: list[str]
kline_limit: int
reference_days: float
simulate_days: float
run_days: float
total_days: float
start: datetime
simulation_start: datetime
simulation_end: datetime
end: datetime
def _as_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
def _as_int(value: Any, default: int = 0) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def _public_http_get(url: str, headers: dict[str, str], timeout: float) -> Any:
request = Request(url, headers=headers)
with urlopen(request, timeout=timeout) as response: # noqa: S310 - market data endpoints are user-configurable
return json.loads(response.read().decode("utf-8"))
def _public_http_status(url: str, headers: dict[str, str], timeout: float) -> tuple[int, str]:
request = Request(url, headers=headers)
try:
with urlopen(request, timeout=timeout) as response: # noqa: S310 - market data endpoints are user-configurable
return response.status, response.read().decode("utf-8")
except HTTPError as exc:
return exc.code, exc.read().decode("utf-8")
def _build_url(base_url: str, path: str, params: dict[str, str]) -> str:
return f"{base_url.rstrip('/')}{path}?{urlencode(params)}"
def _iso(dt: datetime) -> str:
return dt.astimezone(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def _ms(dt: datetime) -> int:
return int(dt.timestamp() * 1000)
def _default_intervals(config: dict[str, Any]) -> list[str]:
configured = config.get("opportunity", {}).get("lookback_intervals", ["1h", "4h", "1d"])
intervals = [str(item).strip() for item in configured if str(item).strip()]
return intervals or ["1h"]
def reference_days_for(config: dict[str, Any]) -> float:
opportunity_config = config.get("opportunity", {})
intervals = _default_intervals(config)
kline_limit = _as_int(opportunity_config.get("kline_limit"), 48)
seconds = [(_INTERVAL_SECONDS.get(interval) or 0) * kline_limit for interval in intervals]
return round(max(seconds or [0]) / 86400, 4)
def build_dataset_plan(
config: dict[str, Any],
*,
simulate_days: float | None = None,
run_days: float | None = None,
now: datetime | None = None,
) -> DatasetPlan:
opportunity_config = config.get("opportunity", {})
intervals = _default_intervals(config)
kline_limit = _as_int(opportunity_config.get("kline_limit"), 48)
reference_days = reference_days_for(config)
simulate = _as_float(
simulate_days if simulate_days is not None else opportunity_config.get("simulate_days"),
7.0,
)
run = _as_float(run_days if run_days is not None else opportunity_config.get("run_days"), 7.0)
end = (now or datetime.now(timezone.utc)).astimezone(timezone.utc).replace(microsecond=0)
total = reference_days + simulate + run
start = end - timedelta(days=total)
simulation_start = start + timedelta(days=reference_days)
simulation_end = simulation_start + timedelta(days=run)
return DatasetPlan(
intervals=intervals,
kline_limit=kline_limit,
reference_days=reference_days,
simulate_days=simulate,
run_days=run,
total_days=round(total, 4),
start=start,
simulation_start=simulation_start,
simulation_end=simulation_end,
end=end,
)
def _binance_base_url(config: dict[str, Any]) -> str:
return str(config.get("binance", {}).get("spot_base_url", "https://api.binance.com"))
def _select_universe(
config: dict[str, Any],
*,
symbols: list[str] | None,
http_get: HttpGet,
timeout: float,
) -> list[str]:
if symbols:
return normalize_symbols(symbols)
market_config = config.get("market", {})
opportunity_config = config.get("opportunity", {})
quote = str(market_config.get("default_quote", "USDT")).upper()
allowlist = set(normalize_symbols(market_config.get("universe_allowlist", [])))
denylist = set(normalize_symbols(market_config.get("universe_denylist", [])))
scan_limit = _as_int(opportunity_config.get("scan_limit"), 50)
min_quote_volume = _as_float(opportunity_config.get("min_quote_volume"), 0.0)
base_url = _binance_base_url(config)
headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
exchange_info = http_get(_build_url(base_url, "/api/v3/exchangeInfo", {}), headers, timeout)
status_map = {normalize_symbol(item["symbol"]): item.get("status", "") for item in exchange_info.get("symbols", [])}
rows = http_get(_build_url(base_url, "/api/v3/ticker/24hr", {}), headers, timeout)
universe: list[tuple[str, float]] = []
for ticker in rows if isinstance(rows, list) else []:
symbol = normalize_symbol(ticker.get("symbol", ""))
if not symbol.endswith(quote):
continue
if allowlist and symbol not in allowlist:
continue
if symbol in denylist:
continue
if status_map.get(symbol) != "TRADING":
continue
quote_volume = _as_float(ticker.get("quoteVolume"))
if quote_volume < min_quote_volume:
continue
universe.append((symbol, quote_volume))
universe.sort(key=lambda item: item[1], reverse=True)
return [symbol for symbol, _ in universe[:scan_limit]]
def _fetch_klines(
config: dict[str, Any],
*,
symbol: str,
interval: str,
start: datetime,
end: datetime,
http_get: HttpGet,
timeout: float,
) -> list[list[Any]]:
base_url = _binance_base_url(config)
headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
interval_ms = (_INTERVAL_SECONDS.get(interval) or 60) * 1000
cursor = _ms(start)
end_ms = _ms(end)
rows: list[list[Any]] = []
while cursor <= end_ms:
url = _build_url(
base_url,
"/api/v3/klines",
{
"symbol": symbol,
"interval": interval,
"startTime": str(cursor),
"endTime": str(end_ms),
"limit": "1000",
},
)
chunk = http_get(url, headers, timeout)
if not chunk:
break
rows.extend(chunk)
last_open = int(chunk[-1][0])
next_cursor = last_open + interval_ms
if next_cursor <= cursor:
break
cursor = next_cursor
if len(chunk) < 1000:
break
return rows
def _probe_external_history(
config: dict[str, Any],
*,
plan: DatasetPlan,
timeout: float,
http_status: Callable[[str, dict[str, str], float], tuple[int, str]] | None = None,
) -> dict[str, Any]:
opportunity_config = config.get("opportunity", {})
provider = str(opportunity_config.get("research_provider", "coingecko")).lower().strip()
if not bool(opportunity_config.get("auto_research", True)) or provider in {"", "off", "none", "disabled"}:
return {"provider": provider or "disabled", "status": "disabled"}
if provider != "coingecko":
return {"provider": provider, "status": "unsupported"}
coingecko_config = config.get("coingecko", {})
base_url = str(coingecko_config.get("base_url", "https://api.coingecko.com/api/v3"))
api_key = str(coingecko_config.get("api_key", "")).strip()
headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
if api_key:
headers["x-cg-demo-api-key"] = api_key
sample_date = plan.simulation_start.strftime("%d-%m-%Y")
url = _build_url(base_url, "/coins/bitcoin/history", {"date": sample_date})
http_status = http_status or _public_http_status
try:
status, body = http_status(url, headers, timeout)
except (TimeoutError, URLError, OSError) as exc:
return {"provider": "coingecko", "status": "failed", "sample_date": sample_date, "error": str(exc)}
if status == 200:
return {"provider": "coingecko", "status": "available", "sample_date": sample_date}
lowered = body.lower()
if "allowed time range" in lowered or "365 days" in lowered:
result_status = "limited"
elif status == 429:
result_status = "rate_limited"
elif status in {401, 403}:
result_status = "unauthorized"
else:
result_status = "failed"
return {
"provider": "coingecko",
"status": result_status,
"sample_date": sample_date,
"http_status": status,
"message": body[:240],
}
def _default_output_path(plan: DatasetPlan) -> Path:
dataset_dir = get_runtime_paths().root / "datasets"
dataset_dir.mkdir(parents=True, exist_ok=True)
stamp = plan.end.strftime("%Y%m%dT%H%M%SZ")
return dataset_dir / f"opportunity_dataset_{stamp}.json"
def collect_opportunity_dataset(
config: dict[str, Any],
*,
symbols: list[str] | None = None,
simulate_days: float | None = None,
run_days: float | None = None,
output_path: str | None = None,
http_get: HttpGet | None = None,
http_status: Callable[[str, dict[str, str], float], tuple[int, str]] | None = None,
now: datetime | None = None,
) -> dict[str, Any]:
opportunity_config = config.get("opportunity", {})
timeout = _as_float(opportunity_config.get("dataset_timeout_seconds"), 15.0)
plan = build_dataset_plan(config, simulate_days=simulate_days, run_days=run_days, now=now)
http_get = http_get or _public_http_get
selected_symbols = _select_universe(config, symbols=symbols, http_get=http_get, timeout=timeout)
klines: dict[str, dict[str, list[list[Any]]]] = {}
counts: dict[str, dict[str, int]] = {}
for symbol in selected_symbols:
klines[symbol] = {}
counts[symbol] = {}
for interval in plan.intervals:
rows = _fetch_klines(
config,
symbol=symbol,
interval=interval,
start=plan.start,
end=plan.end,
http_get=http_get,
timeout=timeout,
)
klines[symbol][interval] = rows
counts[symbol][interval] = len(rows)
external_history = _probe_external_history(config, plan=plan, timeout=timeout, http_status=http_status)
path = Path(output_path).expanduser() if output_path else _default_output_path(plan)
path.parent.mkdir(parents=True, exist_ok=True)
metadata = {
"created_at": _iso(datetime.now(timezone.utc)),
"quote": str(config.get("market", {}).get("default_quote", "USDT")).upper(),
"symbols": selected_symbols,
"plan": {
**{
key: value
for key, value in asdict(plan).items()
if key not in {"start", "simulation_start", "simulation_end", "end"}
},
"start": _iso(plan.start),
"simulation_start": _iso(plan.simulation_start),
"simulation_end": _iso(plan.simulation_end),
"end": _iso(plan.end),
},
"external_history": external_history,
}
dataset = {"metadata": metadata, "klines": klines}
path.write_text(json.dumps(dataset, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"path": str(path),
"symbols": selected_symbols,
"counts": counts,
"plan": metadata["plan"],
"external_history": external_history,
}
def parse_query(url: str) -> dict[str, str]:
"""Test helper for fake HTTP clients."""
parsed = urlparse(url)
return {key: values[-1] for key, values in parse_qs(parsed.query).items()}

View File

@@ -3,11 +3,14 @@
from __future__ import annotations
from dataclasses import asdict, dataclass
from math import log10
from statistics import mean
from typing import Any
from ..audit import audit_event
from .account_service import get_positions
from .market_service import base_asset, get_scan_universe, normalize_symbol
from .research_service import get_external_research
from .signal_service import get_signal_interval, score_opportunity_signal
@@ -29,6 +32,197 @@ def _opportunity_thresholds(config: dict[str, Any]) -> dict[str, float]:
}
def _safe_pct(new: float, old: float) -> float:
if old == 0:
return 0.0
return (new - old) / old
def _clamp(value: float, low: float, high: float) -> float:
return min(max(value, low), high)
def _as_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
def _empty_metrics(concentration: float) -> dict[str, float]:
return {
"trend": 0.0,
"momentum": 0.0,
"breakout": 0.0,
"pullback": 0.0,
"volume_confirmation": 1.0,
"liquidity": 0.0,
"trend_alignment": 0.0,
"volatility": 0.0,
"overextension": 0.0,
"downside_risk": 0.0,
"fundamental": 0.0,
"tokenomics": 0.0,
"catalyst": 0.0,
"adoption": 0.0,
"smart_money": 0.0,
"unlock_risk": 0.0,
"regulatory_risk": 0.0,
"research_confidence": 0.0,
"quality": 0.0,
"concentration": round(concentration, 4),
}
def _series_from_klines(klines: list[list[Any]]) -> tuple[list[float], list[float]]:
return [float(item[4]) for item in klines], [float(item[5]) for item in klines]
def _trend_signal(closes: list[float]) -> float:
if len(closes) < 2:
return 0.0
current = closes[-1]
sma_short = mean(closes[-5:]) if len(closes) >= 5 else current
sma_long = mean(closes[-20:]) if len(closes) >= 20 else mean(closes)
if current >= sma_short >= sma_long:
return 1.0
if current < sma_short < sma_long:
return -1.0
return 0.0
def _trend_alignment(interval_closes: dict[str, list[float]] | None) -> float:
if not interval_closes:
return 0.0
signals = [_trend_signal(closes) for closes in interval_closes.values() if len(closes) >= 2]
return mean(signals) if signals else 0.0
def _range_position(current: float, low: float, high: float) -> float:
if high <= low:
return 0.5
return _clamp((current - low) / (high - low), 0.0, 1.0)
def _normalized_research_score(value: Any) -> float:
"""Normalize provider research inputs to 0..1.
Provider values can be expressed as either 0..1 or 0..100.
"""
score = _as_float(value)
if score > 1.0:
score = score / 100.0
return _clamp(score, 0.0, 1.0)
def _research_signals(research: dict[str, Any] | None) -> dict[str, float]:
research = research or {}
return {
"fundamental": _normalized_research_score(research.get("fundamental")),
"tokenomics": _normalized_research_score(research.get("tokenomics")),
"catalyst": _normalized_research_score(research.get("catalyst")),
"adoption": _normalized_research_score(research.get("adoption")),
"smart_money": _normalized_research_score(research.get("smart_money")),
"unlock_risk": _normalized_research_score(research.get("unlock_risk")),
"regulatory_risk": _normalized_research_score(research.get("regulatory_risk")),
"research_confidence": _normalized_research_score(research.get("research_confidence")),
}
def _score_candidate(
closes: list[float],
volumes: list[float],
ticker: dict[str, Any],
weights: dict[str, float],
concentration: float,
interval_closes: dict[str, list[float]] | None = None,
research: dict[str, Any] | None = None,
) -> tuple[float, dict[str, float]]:
if len(closes) < 2 or not volumes:
return 0.0, _empty_metrics(concentration)
current = closes[-1]
sma_short = mean(closes[-5:]) if len(closes) >= 5 else current
trend = _trend_signal(closes)
momentum = (
_safe_pct(closes[-1], closes[-2]) * 0.5
+ (_safe_pct(closes[-1], closes[-5]) * 0.3 if len(closes) >= 5 else 0.0)
+ _as_float(ticker.get("price_change_pct")) / 100.0 * 0.2
)
recent_high = max(closes[-20:]) if len(closes) >= 20 else max(closes)
breakout = 1.0 - max((recent_high - current) / recent_high, 0.0)
avg_volume = mean(volumes[:-1]) if len(volumes) > 1 else volumes[-1]
volume_confirmation = volumes[-1] / avg_volume if avg_volume else 1.0
volume_score = _clamp(volume_confirmation - 1.0, -1.0, 2.0)
volatility = (max(closes[-10:]) - min(closes[-10:])) / current if len(closes) >= 10 and current else 0.0
quote_volume = _as_float(ticker.get("quote_volume"))
liquidity = _clamp((log10(quote_volume) - 6.0) / 3.0, 0.0, 1.0) if quote_volume > 0 else 0.0
high_price = _as_float(ticker.get("high_price"), recent_high)
low_price = _as_float(ticker.get("low_price"), min(closes))
range_position = _range_position(current, low_price, high_price)
pullback = 1.0 - abs(range_position - 0.62) / 0.62
pullback = _clamp(pullback, 0.0, 1.0)
overextension = max(_safe_pct(current, sma_short) - 0.08, 0.0) + max(
_as_float(ticker.get("price_change_pct")) / 100.0 - 0.12, 0.0
)
downside_risk = max(0.35 - range_position, 0.0) + max(volatility - 0.18, 0.0)
trend_alignment = _trend_alignment(interval_closes)
research_signals = _research_signals(research)
quality = mean(
[
research_signals["fundamental"],
research_signals["tokenomics"],
research_signals["catalyst"],
research_signals["adoption"],
research_signals["smart_money"],
]
)
score = (
weights.get("trend", 1.0) * trend
+ weights.get("momentum", 1.0) * momentum
+ weights.get("breakout", 0.8) * breakout
+ weights.get("pullback", 0.4) * pullback
+ weights.get("volume", 0.7) * volume_score
+ weights.get("liquidity", 0.3) * liquidity
+ weights.get("trend_alignment", 0.8) * trend_alignment
+ weights.get("fundamental", 0.8) * research_signals["fundamental"]
+ weights.get("tokenomics", 0.7) * research_signals["tokenomics"]
+ weights.get("catalyst", 0.5) * research_signals["catalyst"]
+ weights.get("adoption", 0.4) * research_signals["adoption"]
+ weights.get("smart_money", 0.3) * research_signals["smart_money"]
- weights.get("volatility_penalty", 0.5) * volatility
- weights.get("overextension_penalty", 0.7) * overextension
- weights.get("downside_penalty", 0.5) * downside_risk
- weights.get("unlock_penalty", 0.8) * research_signals["unlock_risk"]
- weights.get("regulatory_penalty", 0.4) * research_signals["regulatory_risk"]
- weights.get("position_concentration_penalty", 0.6) * concentration
)
metrics = {
"trend": round(trend, 4),
"momentum": round(momentum, 4),
"breakout": round(breakout, 4),
"pullback": round(pullback, 4),
"volume_confirmation": round(volume_confirmation, 4),
"liquidity": round(liquidity, 4),
"trend_alignment": round(trend_alignment, 4),
"volatility": round(volatility, 4),
"overextension": round(overextension, 4),
"downside_risk": round(downside_risk, 4),
"fundamental": round(research_signals["fundamental"], 4),
"tokenomics": round(research_signals["tokenomics"], 4),
"catalyst": round(research_signals["catalyst"], 4),
"adoption": round(research_signals["adoption"], 4),
"smart_money": round(research_signals["smart_money"], 4),
"unlock_risk": round(research_signals["unlock_risk"], 4),
"regulatory_risk": round(research_signals["regulatory_risk"], 4),
"research_confidence": round(research_signals["research_confidence"], 4),
"quality": round(quality, 4),
"concentration": round(concentration, 4),
}
return score, metrics
def _action_for_opportunity(score: float, metrics: dict[str, float], thresholds: dict[str, float]) -> tuple[str, list[str]]:
reasons: list[str] = []
if metrics["extension_penalty"] >= 1.0 and (metrics["recent_runup"] >= 0.10 or metrics["breakout_pct"] >= 0.03):
@@ -44,6 +238,115 @@ def _action_for_opportunity(score: float, metrics: dict[str, float], thresholds:
return "skip", reasons
def _action_for(
score: float,
concentration: float,
metrics: dict[str, float] | None = None,
risk_limits: dict[str, float] | None = None,
) -> tuple[str, list[str]]:
metrics = metrics or {}
risk_limits = risk_limits or {}
reasons: list[str] = []
if concentration >= 0.5 and score < 0.4:
reasons.append("position concentration is high")
return "trim", reasons
if metrics.get("liquidity", 0.0) < risk_limits.get("min_liquidity", 0.0):
reasons.append("liquidity is below the configured institutional threshold")
return "observe", reasons
if metrics.get("unlock_risk", 0.0) > risk_limits.get("max_unlock_risk", 1.0):
reasons.append("token unlock or dilution risk is too high")
return "observe", reasons
if metrics.get("regulatory_risk", 0.0) > risk_limits.get("max_regulatory_risk", 1.0):
reasons.append("regulatory or listing risk is too high")
return "observe", reasons
if metrics.get("overextension", 0.0) >= risk_limits.get("max_overextension", 0.08):
reasons.append("move looks extended; wait for a cleaner entry")
return "observe", reasons
if metrics.get("downside_risk", 0.0) >= risk_limits.get("max_downside_risk", 0.3) and score < 1.0:
reasons.append("price is weak inside its recent range")
return "observe", reasons
if score >= 1.8 and metrics.get("quality", 0.0) >= risk_limits.get("min_quality_for_add", 0.0):
reasons.append("trend, liquidity, and research signals are aligned")
return "add", reasons
if score >= 0.6:
reasons.append("trend remains constructive")
return "hold", reasons
if score <= -0.2:
reasons.append("momentum and structure have weakened")
return "exit", reasons
reasons.append("signal is mixed and needs confirmation")
return "observe", reasons
def _lookback_intervals(config: dict[str, Any]) -> list[str]:
configured = config.get("opportunity", {}).get("lookback_intervals", ["1h"])
intervals = [str(item) for item in configured if str(item).strip()]
return intervals or ["1h"]
def _risk_limits(config: dict[str, Any]) -> dict[str, float]:
configured = config.get("opportunity", {}).get("risk_limits", {})
return {str(key): _as_float(value) for key, value in configured.items()}
def _ticker_metrics(ticker: dict[str, Any]) -> dict[str, float]:
return {
"price_change_pct": _as_float(ticker.get("priceChangePercent") or ticker.get("price_change_pct")),
"quote_volume": _as_float(ticker.get("quoteVolume") or ticker.get("quote_volume")),
"high_price": _as_float(ticker.get("highPrice") or ticker.get("high_price")),
"low_price": _as_float(ticker.get("lowPrice") or ticker.get("low_price")),
}
def _candidate_series(
spot_client: Any,
symbol: str,
intervals: list[str],
limit: int,
) -> tuple[list[float], list[float], dict[str, list[float]]]:
interval_closes: dict[str, list[float]] = {}
primary_closes: list[float] = []
primary_volumes: list[float] = []
for index, interval in enumerate(intervals):
closes, volumes = _series_from_klines(spot_client.klines(symbol=symbol, interval=interval, limit=limit))
interval_closes[interval] = closes
if index == 0:
primary_closes = closes
primary_volumes = volumes
return primary_closes, primary_volumes, interval_closes
def _add_research_metrics(metrics: dict[str, float], research: dict[str, Any] | None) -> None:
research_signals = _research_signals(research)
for key, value in research_signals.items():
metrics[key] = round(value, 4)
metrics["quality"] = round(
mean(
[
research_signals["fundamental"],
research_signals["tokenomics"],
research_signals["catalyst"],
research_signals["adoption"],
research_signals["smart_money"],
]
),
4,
)
def _research_score(research: dict[str, Any] | None, weights: dict[str, float]) -> float:
signals = _research_signals(research)
return (
weights.get("fundamental", 0.8) * signals["fundamental"]
+ weights.get("tokenomics", 0.7) * signals["tokenomics"]
+ weights.get("catalyst", 0.5) * signals["catalyst"]
+ weights.get("adoption", 0.4) * signals["adoption"]
+ weights.get("smart_money", 0.3) * signals["smart_money"]
- weights.get("unlock_penalty", 0.8) * signals["unlock_risk"]
- weights.get("regulatory_penalty", 0.4) * signals["regulatory_risk"]
)
def scan_opportunities(
config: dict[str, Any],
*,
@@ -51,6 +354,7 @@ def scan_opportunities(
symbols: list[str] | None = None,
) -> dict[str, Any]:
opportunity_config = config.get("opportunity", {})
weights = opportunity_config.get("weights", {})
ignore_dust = bool(opportunity_config.get("ignore_dust", True))
interval = get_signal_interval(config)
thresholds = _opportunity_thresholds(config)
@@ -62,18 +366,27 @@ def scan_opportunities(
total_held = sum(concentration_map.values()) or 1.0
universe = get_scan_universe(config, spot_client=spot_client, symbols=symbols)[:scan_limit]
external_research = get_external_research(
config,
symbols=[normalize_symbol(ticker["symbol"]) for ticker in universe],
quote=quote,
)
recommendations = []
for ticker in universe:
symbol = normalize_symbol(ticker["symbol"])
klines = spot_client.klines(symbol=symbol, interval=interval, limit=24)
closes = [float(item[4]) for item in klines]
volumes = [float(item[5]) for item in klines]
closes, volumes = _series_from_klines(klines)
concentration = concentration_map.get(symbol, 0.0) / total_held
opportunity_score, metrics = score_opportunity_signal(closes, volumes, ticker, opportunity_config)
score = opportunity_score - thresholds["overlap_penalty"] * concentration
action, reasons = _action_for_opportunity(score, metrics, thresholds)
metrics["opportunity_score"] = round(opportunity_score, 4)
metrics["position_weight"] = round(concentration, 4)
research = external_research.get(symbol, {})
research_score = _research_score(research, weights)
score += research_score
metrics["research_score"] = round(research_score, 4)
_add_research_metrics(metrics, research)
action, reasons = _action_for_opportunity(score, metrics, thresholds)
if symbol.endswith(quote):
reasons.append(f"base asset {base_asset(symbol, quote)} passed liquidity and tradability filters")
if concentration > 0:

View File

@@ -8,7 +8,11 @@ from typing import Any
from ..audit import audit_event
from .account_service import get_positions
from .market_service import normalize_symbol
from .signal_service import get_signal_interval, get_signal_weights, score_portfolio_signal
from .signal_service import (
get_signal_interval,
get_signal_weights,
score_portfolio_signal,
)
@dataclass

View File

@@ -0,0 +1,214 @@
"""External research signal providers for opportunity scoring."""
from __future__ import annotations
import json
from collections.abc import Callable
from math import log10
from typing import Any
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from .market_service import base_asset, normalize_symbol
HttpGet = Callable[[str, dict[str, str], float], Any]
def _clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
return min(max(value, low), high)
def _as_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
def _safe_ratio(numerator: float, denominator: float) -> float:
if denominator <= 0:
return 0.0
return numerator / denominator
def _log_score(value: float, *, floor: float, span: float) -> float:
if value <= 0:
return 0.0
return _clamp((log10(value) - floor) / span)
def _pct_score(value: float, *, low: float, high: float) -> float:
if high <= low:
return 0.0
return _clamp((value - low) / (high - low))
def _public_http_get(url: str, headers: dict[str, str], timeout: float) -> Any:
request = Request(url, headers=headers)
with urlopen(request, timeout=timeout) as response: # noqa: S310 - user-configured market data endpoint
return json.loads(response.read().decode("utf-8"))
def _build_url(base_url: str, path: str, params: dict[str, str]) -> str:
return f"{base_url.rstrip('/')}{path}?{urlencode(params)}"
def _chunked(items: list[str], size: int) -> list[list[str]]:
return [items[index : index + size] for index in range(0, len(items), size)]
def _coingecko_market_to_signals(row: dict[str, Any], *, is_trending: bool = False) -> dict[str, float]:
market_cap = _as_float(row.get("market_cap"))
fdv = _as_float(row.get("fully_diluted_valuation"))
volume = _as_float(row.get("total_volume"))
rank = _as_float(row.get("market_cap_rank"), 9999.0)
circulating = _as_float(row.get("circulating_supply"))
total_supply = _as_float(row.get("total_supply"))
max_supply = _as_float(row.get("max_supply"))
supply_cap = max_supply or total_supply
rank_score = _clamp(1.0 - (log10(max(rank, 1.0)) / 4.0))
size_score = _log_score(market_cap, floor=7.0, span=5.0)
volume_to_mcap = _safe_ratio(volume, market_cap)
liquidity_quality = _clamp(volume_to_mcap / 0.10)
fdv_ratio = _safe_ratio(fdv, market_cap) if fdv and market_cap else 1.0
fdv_dilution_risk = _clamp((fdv_ratio - 1.0) / 4.0)
supply_unlocked = _clamp(_safe_ratio(circulating, supply_cap)) if supply_cap else max(0.0, 1.0 - fdv_dilution_risk)
supply_dilution_risk = 1.0 - supply_unlocked
unlock_risk = max(fdv_dilution_risk, supply_dilution_risk * 0.8)
pct_7d = _as_float(row.get("price_change_percentage_7d_in_currency"))
pct_30d = _as_float(row.get("price_change_percentage_30d_in_currency"))
pct_200d = _as_float(row.get("price_change_percentage_200d_in_currency"))
medium_momentum = _pct_score(pct_30d, low=-15.0, high=60.0)
long_momentum = _pct_score(pct_200d, low=-40.0, high=150.0)
trend_catalyst = _pct_score(pct_7d, low=-5.0, high=25.0)
trend_bonus = 1.0 if is_trending else 0.0
tokenomics = _clamp(0.65 * supply_unlocked + 0.35 * (1.0 - fdv_dilution_risk))
fundamental = _clamp(0.40 * rank_score + 0.35 * size_score + 0.25 * liquidity_quality)
catalyst = _clamp(0.45 * trend_catalyst + 0.40 * medium_momentum + 0.15 * trend_bonus)
adoption = _clamp(0.45 * rank_score + 0.35 * liquidity_quality + 0.20 * long_momentum)
smart_money = _clamp(0.35 * rank_score + 0.35 * liquidity_quality + 0.30 * (1.0 - unlock_risk))
regulatory_risk = 0.10 if rank <= 100 else 0.20 if rank <= 500 else 0.35
populated_fields = sum(
1
for value in (market_cap, fdv, volume, rank, circulating, supply_cap, pct_7d, pct_30d, pct_200d)
if value
)
confidence = _clamp(populated_fields / 9.0)
return {
"fundamental": round(fundamental, 4),
"tokenomics": round(tokenomics, 4),
"catalyst": round(catalyst, 4),
"adoption": round(adoption, 4),
"smart_money": round(smart_money, 4),
"unlock_risk": round(unlock_risk, 4),
"regulatory_risk": round(regulatory_risk, 4),
"research_confidence": round(confidence, 4),
}
def _coingecko_headers(config: dict[str, Any]) -> dict[str, str]:
coingecko_config = config.get("coingecko", {})
headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
api_key = str(coingecko_config.get("api_key", "")).strip()
if api_key:
headers["x-cg-demo-api-key"] = api_key
return headers
def _fetch_coingecko_research(
config: dict[str, Any],
*,
symbols: list[str],
quote: str,
http_get: HttpGet | None = None,
) -> dict[str, dict[str, float]]:
if not symbols:
return {}
opportunity_config = config.get("opportunity", {})
coingecko_config = config.get("coingecko", {})
base_url = str(coingecko_config.get("base_url", "https://api.coingecko.com/api/v3"))
timeout = _as_float(opportunity_config.get("research_timeout_seconds"), 4.0)
headers = _coingecko_headers(config)
http_get = http_get or _public_http_get
base_to_symbol = {
base_asset(normalize_symbol(symbol), quote).lower(): normalize_symbol(symbol)
for symbol in symbols
if normalize_symbol(symbol)
}
bases = sorted(base_to_symbol)
if not bases:
return {}
trending_ids: set[str] = set()
try:
trending_url = _build_url(base_url, "/search/trending", {})
trending_payload = http_get(trending_url, headers, timeout)
for item in trending_payload.get("coins", []):
coin = item.get("item", {})
coin_id = str(coin.get("id", "")).strip()
if coin_id:
trending_ids.add(coin_id)
except Exception:
trending_ids = set()
research: dict[str, dict[str, float]] = {}
for chunk in _chunked(bases, 50):
params = {
"vs_currency": "usd",
"symbols": ",".join(chunk),
"include_tokens": "top",
"order": "market_cap_desc",
"per_page": "250",
"page": "1",
"sparkline": "false",
"price_change_percentage": "7d,30d,200d",
}
try:
markets_url = _build_url(base_url, "/coins/markets", params)
rows = http_get(markets_url, headers, timeout)
except Exception:
continue
seen_bases: set[str] = set()
for row in rows if isinstance(rows, list) else []:
symbol = str(row.get("symbol", "")).lower()
if symbol in seen_bases or symbol not in base_to_symbol:
continue
seen_bases.add(symbol)
normalized = base_to_symbol[symbol]
research[normalized] = _coingecko_market_to_signals(
row,
is_trending=str(row.get("id", "")) in trending_ids,
)
return research
def get_external_research(
config: dict[str, Any],
*,
symbols: list[str],
quote: str,
http_get: HttpGet | None = None,
) -> dict[str, dict[str, float]]:
"""Fetch automated research signals for symbols.
Returns an empty map when disabled or when the configured provider is unavailable.
Opportunity scans should continue rather than fail because a research endpoint timed out.
"""
opportunity_config = config.get("opportunity", {})
if not bool(opportunity_config.get("auto_research", True)):
return {}
provider = str(opportunity_config.get("research_provider", "coingecko")).strip().lower()
if provider in {"", "off", "none", "disabled"}:
return {}
if provider != "coingecko":
return {}
return _fetch_coingecko_research(config, symbols=symbols, quote=quote, http_get=http_get)