Add opportunity dataset collection

2026-04-21 19:41:48 +08:00
parent 50402e4aa7
commit 436bef4814
10 changed files with 1295 additions and 32 deletions
--- a/src/coinhunter/services/opportunity_dataset_service.py
+++ b/src/coinhunter/services/opportunity_dataset_service.py
@@ -0,0 +1,353 @@
+"""Historical dataset collection for opportunity evaluation."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any
+from urllib.error import HTTPError, URLError
+from urllib.parse import parse_qs, urlencode, urlparse
+from urllib.request import Request, urlopen
+
+from ..runtime import get_runtime_paths
+from .market_service import normalize_symbol, normalize_symbols
+
+HttpGet = Callable[[str, dict[str, str], float], Any]
+
+_INTERVAL_SECONDS = {
+    "1m": 60,
+    "3m": 180,
+    "5m": 300,
+    "15m": 900,
+    "30m": 1800,
+    "1h": 3600,
+    "2h": 7200,
+    "4h": 14400,
+    "6h": 21600,
+    "8h": 28800,
+    "12h": 43200,
+    "1d": 86400,
+    "3d": 259200,
+    "1w": 604800,
+}
+
+
+@dataclass(frozen=True)
+class DatasetPlan:
+    intervals: list[str]
+    kline_limit: int
+    reference_days: float
+    simulate_days: float
+    run_days: float
+    total_days: float
+    start: datetime
+    simulation_start: datetime
+    simulation_end: datetime
+    end: datetime
+
+
+def _as_float(value: Any, default: float = 0.0) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _as_int(value: Any, default: int = 0) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _public_http_get(url: str, headers: dict[str, str], timeout: float) -> Any:
+    request = Request(url, headers=headers)
+    with urlopen(request, timeout=timeout) as response:  # noqa: S310 - market data endpoints are user-configurable
+        return json.loads(response.read().decode("utf-8"))
+
+
+def _public_http_status(url: str, headers: dict[str, str], timeout: float) -> tuple[int, str]:
+    request = Request(url, headers=headers)
+    try:
+        with urlopen(request, timeout=timeout) as response:  # noqa: S310 - market data endpoints are user-configurable
+            return response.status, response.read().decode("utf-8")
+    except HTTPError as exc:
+        return exc.code, exc.read().decode("utf-8")
+
+
+def _build_url(base_url: str, path: str, params: dict[str, str]) -> str:
+    return f"{base_url.rstrip('/')}{path}?{urlencode(params)}"
+
+
+def _iso(dt: datetime) -> str:
+    return dt.astimezone(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def _ms(dt: datetime) -> int:
+    return int(dt.timestamp() * 1000)
+
+
+def _default_intervals(config: dict[str, Any]) -> list[str]:
+    configured = config.get("opportunity", {}).get("lookback_intervals", ["1h", "4h", "1d"])
+    intervals = [str(item).strip() for item in configured if str(item).strip()]
+    return intervals or ["1h"]
+
+
+def reference_days_for(config: dict[str, Any]) -> float:
+    opportunity_config = config.get("opportunity", {})
+    intervals = _default_intervals(config)
+    kline_limit = _as_int(opportunity_config.get("kline_limit"), 48)
+    seconds = [(_INTERVAL_SECONDS.get(interval) or 0) * kline_limit for interval in intervals]
+    return round(max(seconds or [0]) / 86400, 4)
+
+
+def build_dataset_plan(
+    config: dict[str, Any],
+    *,
+    simulate_days: float | None = None,
+    run_days: float | None = None,
+    now: datetime | None = None,
+) -> DatasetPlan:
+    opportunity_config = config.get("opportunity", {})
+    intervals = _default_intervals(config)
+    kline_limit = _as_int(opportunity_config.get("kline_limit"), 48)
+    reference_days = reference_days_for(config)
+    simulate = _as_float(
+        simulate_days if simulate_days is not None else opportunity_config.get("simulate_days"),
+        7.0,
+    )
+    run = _as_float(run_days if run_days is not None else opportunity_config.get("run_days"), 7.0)
+    end = (now or datetime.now(timezone.utc)).astimezone(timezone.utc).replace(microsecond=0)
+    total = reference_days + simulate + run
+    start = end - timedelta(days=total)
+    simulation_start = start + timedelta(days=reference_days)
+    simulation_end = simulation_start + timedelta(days=run)
+    return DatasetPlan(
+        intervals=intervals,
+        kline_limit=kline_limit,
+        reference_days=reference_days,
+        simulate_days=simulate,
+        run_days=run,
+        total_days=round(total, 4),
+        start=start,
+        simulation_start=simulation_start,
+        simulation_end=simulation_end,
+        end=end,
+    )
+
+
+def _binance_base_url(config: dict[str, Any]) -> str:
+    return str(config.get("binance", {}).get("spot_base_url", "https://api.binance.com"))
+
+
+def _select_universe(
+    config: dict[str, Any],
+    *,
+    symbols: list[str] | None,
+    http_get: HttpGet,
+    timeout: float,
+) -> list[str]:
+    if symbols:
+        return normalize_symbols(symbols)
+
+    market_config = config.get("market", {})
+    opportunity_config = config.get("opportunity", {})
+    quote = str(market_config.get("default_quote", "USDT")).upper()
+    allowlist = set(normalize_symbols(market_config.get("universe_allowlist", [])))
+    denylist = set(normalize_symbols(market_config.get("universe_denylist", [])))
+    scan_limit = _as_int(opportunity_config.get("scan_limit"), 50)
+    min_quote_volume = _as_float(opportunity_config.get("min_quote_volume"), 0.0)
+    base_url = _binance_base_url(config)
+    headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
+
+    exchange_info = http_get(_build_url(base_url, "/api/v3/exchangeInfo", {}), headers, timeout)
+    status_map = {normalize_symbol(item["symbol"]): item.get("status", "") for item in exchange_info.get("symbols", [])}
+    rows = http_get(_build_url(base_url, "/api/v3/ticker/24hr", {}), headers, timeout)
+
+    universe: list[tuple[str, float]] = []
+    for ticker in rows if isinstance(rows, list) else []:
+        symbol = normalize_symbol(ticker.get("symbol", ""))
+        if not symbol.endswith(quote):
+            continue
+        if allowlist and symbol not in allowlist:
+            continue
+        if symbol in denylist:
+            continue
+        if status_map.get(symbol) != "TRADING":
+            continue
+        quote_volume = _as_float(ticker.get("quoteVolume"))
+        if quote_volume < min_quote_volume:
+            continue
+        universe.append((symbol, quote_volume))
+    universe.sort(key=lambda item: item[1], reverse=True)
+    return [symbol for symbol, _ in universe[:scan_limit]]
+
+
+def _fetch_klines(
+    config: dict[str, Any],
+    *,
+    symbol: str,
+    interval: str,
+    start: datetime,
+    end: datetime,
+    http_get: HttpGet,
+    timeout: float,
+) -> list[list[Any]]:
+    base_url = _binance_base_url(config)
+    headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
+    interval_ms = (_INTERVAL_SECONDS.get(interval) or 60) * 1000
+    cursor = _ms(start)
+    end_ms = _ms(end)
+    rows: list[list[Any]] = []
+    while cursor <= end_ms:
+        url = _build_url(
+            base_url,
+            "/api/v3/klines",
+            {
+                "symbol": symbol,
+                "interval": interval,
+                "startTime": str(cursor),
+                "endTime": str(end_ms),
+                "limit": "1000",
+            },
+        )
+        chunk = http_get(url, headers, timeout)
+        if not chunk:
+            break
+        rows.extend(chunk)
+        last_open = int(chunk[-1][0])
+        next_cursor = last_open + interval_ms
+        if next_cursor <= cursor:
+            break
+        cursor = next_cursor
+        if len(chunk) < 1000:
+            break
+    return rows
+
+
+def _probe_external_history(
+    config: dict[str, Any],
+    *,
+    plan: DatasetPlan,
+    timeout: float,
+    http_status: Callable[[str, dict[str, str], float], tuple[int, str]] | None = None,
+) -> dict[str, Any]:
+    opportunity_config = config.get("opportunity", {})
+    provider = str(opportunity_config.get("research_provider", "coingecko")).lower().strip()
+    if not bool(opportunity_config.get("auto_research", True)) or provider in {"", "off", "none", "disabled"}:
+        return {"provider": provider or "disabled", "status": "disabled"}
+    if provider != "coingecko":
+        return {"provider": provider, "status": "unsupported"}
+
+    coingecko_config = config.get("coingecko", {})
+    base_url = str(coingecko_config.get("base_url", "https://api.coingecko.com/api/v3"))
+    api_key = str(coingecko_config.get("api_key", "")).strip()
+    headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
+    if api_key:
+        headers["x-cg-demo-api-key"] = api_key
+    sample_date = plan.simulation_start.strftime("%d-%m-%Y")
+    url = _build_url(base_url, "/coins/bitcoin/history", {"date": sample_date})
+    http_status = http_status or _public_http_status
+    try:
+        status, body = http_status(url, headers, timeout)
+    except (TimeoutError, URLError, OSError) as exc:
+        return {"provider": "coingecko", "status": "failed", "sample_date": sample_date, "error": str(exc)}
+    if status == 200:
+        return {"provider": "coingecko", "status": "available", "sample_date": sample_date}
+    lowered = body.lower()
+    if "allowed time range" in lowered or "365 days" in lowered:
+        result_status = "limited"
+    elif status == 429:
+        result_status = "rate_limited"
+    elif status in {401, 403}:
+        result_status = "unauthorized"
+    else:
+        result_status = "failed"
+    return {
+        "provider": "coingecko",
+        "status": result_status,
+        "sample_date": sample_date,
+        "http_status": status,
+        "message": body[:240],
+    }
+
+
+def _default_output_path(plan: DatasetPlan) -> Path:
+    dataset_dir = get_runtime_paths().root / "datasets"
+    dataset_dir.mkdir(parents=True, exist_ok=True)
+    stamp = plan.end.strftime("%Y%m%dT%H%M%SZ")
+    return dataset_dir / f"opportunity_dataset_{stamp}.json"
+
+
+def collect_opportunity_dataset(
+    config: dict[str, Any],
+    *,
+    symbols: list[str] | None = None,
+    simulate_days: float | None = None,
+    run_days: float | None = None,
+    output_path: str | None = None,
+    http_get: HttpGet | None = None,
+    http_status: Callable[[str, dict[str, str], float], tuple[int, str]] | None = None,
+    now: datetime | None = None,
+) -> dict[str, Any]:
+    opportunity_config = config.get("opportunity", {})
+    timeout = _as_float(opportunity_config.get("dataset_timeout_seconds"), 15.0)
+    plan = build_dataset_plan(config, simulate_days=simulate_days, run_days=run_days, now=now)
+    http_get = http_get or _public_http_get
+    selected_symbols = _select_universe(config, symbols=symbols, http_get=http_get, timeout=timeout)
+    klines: dict[str, dict[str, list[list[Any]]]] = {}
+    counts: dict[str, dict[str, int]] = {}
+    for symbol in selected_symbols:
+        klines[symbol] = {}
+        counts[symbol] = {}
+        for interval in plan.intervals:
+            rows = _fetch_klines(
+                config,
+                symbol=symbol,
+                interval=interval,
+                start=plan.start,
+                end=plan.end,
+                http_get=http_get,
+                timeout=timeout,
+            )
+            klines[symbol][interval] = rows
+            counts[symbol][interval] = len(rows)
+
+    external_history = _probe_external_history(config, plan=plan, timeout=timeout, http_status=http_status)
+    path = Path(output_path).expanduser() if output_path else _default_output_path(plan)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    metadata = {
+        "created_at": _iso(datetime.now(timezone.utc)),
+        "quote": str(config.get("market", {}).get("default_quote", "USDT")).upper(),
+        "symbols": selected_symbols,
+        "plan": {
+            **{
+                key: value
+                for key, value in asdict(plan).items()
+                if key not in {"start", "simulation_start", "simulation_end", "end"}
+            },
+            "start": _iso(plan.start),
+            "simulation_start": _iso(plan.simulation_start),
+            "simulation_end": _iso(plan.simulation_end),
+            "end": _iso(plan.end),
+        },
+        "external_history": external_history,
+    }
+    dataset = {"metadata": metadata, "klines": klines}
+    path.write_text(json.dumps(dataset, ensure_ascii=False, indent=2), encoding="utf-8")
+    return {
+        "path": str(path),
+        "symbols": selected_symbols,
+        "counts": counts,
+        "plan": metadata["plan"],
+        "external_history": external_history,
+    }
+
+
+def parse_query(url: str) -> dict[str, str]:
+    """Test helper for fake HTTP clients."""
+    parsed = urlparse(url)
+    return {key: values[-1] for key, values in parse_qs(parsed.query).items()}
--- a/src/coinhunter/services/opportunity_service.py
+++ b/src/coinhunter/services/opportunity_service.py
@@ -3,11 +3,14 @@
 from __future__ import annotations

 from dataclasses import asdict, dataclass
+from math import log10
+from statistics import mean
 from typing import Any

 from ..audit import audit_event
 from .account_service import get_positions
 from .market_service import base_asset, get_scan_universe, normalize_symbol
+from .research_service import get_external_research
 from .signal_service import get_signal_interval, score_opportunity_signal


@@ -29,6 +32,197 @@ def _opportunity_thresholds(config: dict[str, Any]) -> dict[str, float]:
    }


+def _safe_pct(new: float, old: float) -> float:
+    if old == 0:
+        return 0.0
+    return (new - old) / old
+
+
+def _clamp(value: float, low: float, high: float) -> float:
+    return min(max(value, low), high)
+
+
+def _as_float(value: Any, default: float = 0.0) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _empty_metrics(concentration: float) -> dict[str, float]:
+    return {
+        "trend": 0.0,
+        "momentum": 0.0,
+        "breakout": 0.0,
+        "pullback": 0.0,
+        "volume_confirmation": 1.0,
+        "liquidity": 0.0,
+        "trend_alignment": 0.0,
+        "volatility": 0.0,
+        "overextension": 0.0,
+        "downside_risk": 0.0,
+        "fundamental": 0.0,
+        "tokenomics": 0.0,
+        "catalyst": 0.0,
+        "adoption": 0.0,
+        "smart_money": 0.0,
+        "unlock_risk": 0.0,
+        "regulatory_risk": 0.0,
+        "research_confidence": 0.0,
+        "quality": 0.0,
+        "concentration": round(concentration, 4),
+    }
+
+
+def _series_from_klines(klines: list[list[Any]]) -> tuple[list[float], list[float]]:
+    return [float(item[4]) for item in klines], [float(item[5]) for item in klines]
+
+
+def _trend_signal(closes: list[float]) -> float:
+    if len(closes) < 2:
+        return 0.0
+    current = closes[-1]
+    sma_short = mean(closes[-5:]) if len(closes) >= 5 else current
+    sma_long = mean(closes[-20:]) if len(closes) >= 20 else mean(closes)
+    if current >= sma_short >= sma_long:
+        return 1.0
+    if current < sma_short < sma_long:
+        return -1.0
+    return 0.0
+
+
+def _trend_alignment(interval_closes: dict[str, list[float]] | None) -> float:
+    if not interval_closes:
+        return 0.0
+    signals = [_trend_signal(closes) for closes in interval_closes.values() if len(closes) >= 2]
+    return mean(signals) if signals else 0.0
+
+
+def _range_position(current: float, low: float, high: float) -> float:
+    if high <= low:
+        return 0.5
+    return _clamp((current - low) / (high - low), 0.0, 1.0)
+
+
+def _normalized_research_score(value: Any) -> float:
+    """Normalize provider research inputs to 0..1.
+
+    Provider values can be expressed as either 0..1 or 0..100.
+    """
+    score = _as_float(value)
+    if score > 1.0:
+        score = score / 100.0
+    return _clamp(score, 0.0, 1.0)
+
+
+def _research_signals(research: dict[str, Any] | None) -> dict[str, float]:
+    research = research or {}
+    return {
+        "fundamental": _normalized_research_score(research.get("fundamental")),
+        "tokenomics": _normalized_research_score(research.get("tokenomics")),
+        "catalyst": _normalized_research_score(research.get("catalyst")),
+        "adoption": _normalized_research_score(research.get("adoption")),
+        "smart_money": _normalized_research_score(research.get("smart_money")),
+        "unlock_risk": _normalized_research_score(research.get("unlock_risk")),
+        "regulatory_risk": _normalized_research_score(research.get("regulatory_risk")),
+        "research_confidence": _normalized_research_score(research.get("research_confidence")),
+    }
+
+
+def _score_candidate(
+    closes: list[float],
+    volumes: list[float],
+    ticker: dict[str, Any],
+    weights: dict[str, float],
+    concentration: float,
+    interval_closes: dict[str, list[float]] | None = None,
+    research: dict[str, Any] | None = None,
+) -> tuple[float, dict[str, float]]:
+    if len(closes) < 2 or not volumes:
+        return 0.0, _empty_metrics(concentration)
+
+    current = closes[-1]
+    sma_short = mean(closes[-5:]) if len(closes) >= 5 else current
+    trend = _trend_signal(closes)
+    momentum = (
+        _safe_pct(closes[-1], closes[-2]) * 0.5
+        + (_safe_pct(closes[-1], closes[-5]) * 0.3 if len(closes) >= 5 else 0.0)
+        + _as_float(ticker.get("price_change_pct")) / 100.0 * 0.2
+    )
+    recent_high = max(closes[-20:]) if len(closes) >= 20 else max(closes)
+    breakout = 1.0 - max((recent_high - current) / recent_high, 0.0)
+    avg_volume = mean(volumes[:-1]) if len(volumes) > 1 else volumes[-1]
+    volume_confirmation = volumes[-1] / avg_volume if avg_volume else 1.0
+    volume_score = _clamp(volume_confirmation - 1.0, -1.0, 2.0)
+    volatility = (max(closes[-10:]) - min(closes[-10:])) / current if len(closes) >= 10 and current else 0.0
+    quote_volume = _as_float(ticker.get("quote_volume"))
+    liquidity = _clamp((log10(quote_volume) - 6.0) / 3.0, 0.0, 1.0) if quote_volume > 0 else 0.0
+    high_price = _as_float(ticker.get("high_price"), recent_high)
+    low_price = _as_float(ticker.get("low_price"), min(closes))
+    range_position = _range_position(current, low_price, high_price)
+    pullback = 1.0 - abs(range_position - 0.62) / 0.62
+    pullback = _clamp(pullback, 0.0, 1.0)
+    overextension = max(_safe_pct(current, sma_short) - 0.08, 0.0) + max(
+        _as_float(ticker.get("price_change_pct")) / 100.0 - 0.12, 0.0
+    )
+    downside_risk = max(0.35 - range_position, 0.0) + max(volatility - 0.18, 0.0)
+    trend_alignment = _trend_alignment(interval_closes)
+    research_signals = _research_signals(research)
+    quality = mean(
+        [
+            research_signals["fundamental"],
+            research_signals["tokenomics"],
+            research_signals["catalyst"],
+            research_signals["adoption"],
+            research_signals["smart_money"],
+        ]
+    )
+
+    score = (
+        weights.get("trend", 1.0) * trend
+        + weights.get("momentum", 1.0) * momentum
+        + weights.get("breakout", 0.8) * breakout
+        + weights.get("pullback", 0.4) * pullback
+        + weights.get("volume", 0.7) * volume_score
+        + weights.get("liquidity", 0.3) * liquidity
+        + weights.get("trend_alignment", 0.8) * trend_alignment
+        + weights.get("fundamental", 0.8) * research_signals["fundamental"]
+        + weights.get("tokenomics", 0.7) * research_signals["tokenomics"]
+        + weights.get("catalyst", 0.5) * research_signals["catalyst"]
+        + weights.get("adoption", 0.4) * research_signals["adoption"]
+        + weights.get("smart_money", 0.3) * research_signals["smart_money"]
+        - weights.get("volatility_penalty", 0.5) * volatility
+        - weights.get("overextension_penalty", 0.7) * overextension
+        - weights.get("downside_penalty", 0.5) * downside_risk
+        - weights.get("unlock_penalty", 0.8) * research_signals["unlock_risk"]
+        - weights.get("regulatory_penalty", 0.4) * research_signals["regulatory_risk"]
+        - weights.get("position_concentration_penalty", 0.6) * concentration
+    )
+    metrics = {
+        "trend": round(trend, 4),
+        "momentum": round(momentum, 4),
+        "breakout": round(breakout, 4),
+        "pullback": round(pullback, 4),
+        "volume_confirmation": round(volume_confirmation, 4),
+        "liquidity": round(liquidity, 4),
+        "trend_alignment": round(trend_alignment, 4),
+        "volatility": round(volatility, 4),
+        "overextension": round(overextension, 4),
+        "downside_risk": round(downside_risk, 4),
+        "fundamental": round(research_signals["fundamental"], 4),
+        "tokenomics": round(research_signals["tokenomics"], 4),
+        "catalyst": round(research_signals["catalyst"], 4),
+        "adoption": round(research_signals["adoption"], 4),
+        "smart_money": round(research_signals["smart_money"], 4),
+        "unlock_risk": round(research_signals["unlock_risk"], 4),
+        "regulatory_risk": round(research_signals["regulatory_risk"], 4),
+        "research_confidence": round(research_signals["research_confidence"], 4),
+        "quality": round(quality, 4),
+        "concentration": round(concentration, 4),
+    }
+    return score, metrics
+
+
 def _action_for_opportunity(score: float, metrics: dict[str, float], thresholds: dict[str, float]) -> tuple[str, list[str]]:
    reasons: list[str] = []
    if metrics["extension_penalty"] >= 1.0 and (metrics["recent_runup"] >= 0.10 or metrics["breakout_pct"] >= 0.03):
@@ -44,6 +238,115 @@ def _action_for_opportunity(score: float, metrics: dict[str, float], thresholds:
    return "skip", reasons


+def _action_for(
+    score: float,
+    concentration: float,
+    metrics: dict[str, float] | None = None,
+    risk_limits: dict[str, float] | None = None,
+) -> tuple[str, list[str]]:
+    metrics = metrics or {}
+    risk_limits = risk_limits or {}
+    reasons: list[str] = []
+    if concentration >= 0.5 and score < 0.4:
+        reasons.append("position concentration is high")
+        return "trim", reasons
+    if metrics.get("liquidity", 0.0) < risk_limits.get("min_liquidity", 0.0):
+        reasons.append("liquidity is below the configured institutional threshold")
+        return "observe", reasons
+    if metrics.get("unlock_risk", 0.0) > risk_limits.get("max_unlock_risk", 1.0):
+        reasons.append("token unlock or dilution risk is too high")
+        return "observe", reasons
+    if metrics.get("regulatory_risk", 0.0) > risk_limits.get("max_regulatory_risk", 1.0):
+        reasons.append("regulatory or listing risk is too high")
+        return "observe", reasons
+    if metrics.get("overextension", 0.0) >= risk_limits.get("max_overextension", 0.08):
+        reasons.append("move looks extended; wait for a cleaner entry")
+        return "observe", reasons
+    if metrics.get("downside_risk", 0.0) >= risk_limits.get("max_downside_risk", 0.3) and score < 1.0:
+        reasons.append("price is weak inside its recent range")
+        return "observe", reasons
+    if score >= 1.8 and metrics.get("quality", 0.0) >= risk_limits.get("min_quality_for_add", 0.0):
+        reasons.append("trend, liquidity, and research signals are aligned")
+        return "add", reasons
+    if score >= 0.6:
+        reasons.append("trend remains constructive")
+        return "hold", reasons
+    if score <= -0.2:
+        reasons.append("momentum and structure have weakened")
+        return "exit", reasons
+    reasons.append("signal is mixed and needs confirmation")
+    return "observe", reasons
+
+
+def _lookback_intervals(config: dict[str, Any]) -> list[str]:
+    configured = config.get("opportunity", {}).get("lookback_intervals", ["1h"])
+    intervals = [str(item) for item in configured if str(item).strip()]
+    return intervals or ["1h"]
+
+
+def _risk_limits(config: dict[str, Any]) -> dict[str, float]:
+    configured = config.get("opportunity", {}).get("risk_limits", {})
+    return {str(key): _as_float(value) for key, value in configured.items()}
+
+
+def _ticker_metrics(ticker: dict[str, Any]) -> dict[str, float]:
+    return {
+        "price_change_pct": _as_float(ticker.get("priceChangePercent") or ticker.get("price_change_pct")),
+        "quote_volume": _as_float(ticker.get("quoteVolume") or ticker.get("quote_volume")),
+        "high_price": _as_float(ticker.get("highPrice") or ticker.get("high_price")),
+        "low_price": _as_float(ticker.get("lowPrice") or ticker.get("low_price")),
+    }
+
+
+def _candidate_series(
+    spot_client: Any,
+    symbol: str,
+    intervals: list[str],
+    limit: int,
+) -> tuple[list[float], list[float], dict[str, list[float]]]:
+    interval_closes: dict[str, list[float]] = {}
+    primary_closes: list[float] = []
+    primary_volumes: list[float] = []
+    for index, interval in enumerate(intervals):
+        closes, volumes = _series_from_klines(spot_client.klines(symbol=symbol, interval=interval, limit=limit))
+        interval_closes[interval] = closes
+        if index == 0:
+            primary_closes = closes
+            primary_volumes = volumes
+    return primary_closes, primary_volumes, interval_closes
+
+
+def _add_research_metrics(metrics: dict[str, float], research: dict[str, Any] | None) -> None:
+    research_signals = _research_signals(research)
+    for key, value in research_signals.items():
+        metrics[key] = round(value, 4)
+    metrics["quality"] = round(
+        mean(
+            [
+                research_signals["fundamental"],
+                research_signals["tokenomics"],
+                research_signals["catalyst"],
+                research_signals["adoption"],
+                research_signals["smart_money"],
+            ]
+        ),
+        4,
+    )
+
+
+def _research_score(research: dict[str, Any] | None, weights: dict[str, float]) -> float:
+    signals = _research_signals(research)
+    return (
+        weights.get("fundamental", 0.8) * signals["fundamental"]
+        + weights.get("tokenomics", 0.7) * signals["tokenomics"]
+        + weights.get("catalyst", 0.5) * signals["catalyst"]
+        + weights.get("adoption", 0.4) * signals["adoption"]
+        + weights.get("smart_money", 0.3) * signals["smart_money"]
+        - weights.get("unlock_penalty", 0.8) * signals["unlock_risk"]
+        - weights.get("regulatory_penalty", 0.4) * signals["regulatory_risk"]
+    )
+
+
 def scan_opportunities(
    config: dict[str, Any],
    *,
@@ -51,6 +354,7 @@ def scan_opportunities(
    symbols: list[str] | None = None,
 ) -> dict[str, Any]:
    opportunity_config = config.get("opportunity", {})
+    weights = opportunity_config.get("weights", {})
    ignore_dust = bool(opportunity_config.get("ignore_dust", True))
    interval = get_signal_interval(config)
    thresholds = _opportunity_thresholds(config)
@@ -62,18 +366,27 @@ def scan_opportunities(
    total_held = sum(concentration_map.values()) or 1.0

    universe = get_scan_universe(config, spot_client=spot_client, symbols=symbols)[:scan_limit]
+    external_research = get_external_research(
+        config,
+        symbols=[normalize_symbol(ticker["symbol"]) for ticker in universe],
+        quote=quote,
+    )
    recommendations = []
    for ticker in universe:
        symbol = normalize_symbol(ticker["symbol"])
        klines = spot_client.klines(symbol=symbol, interval=interval, limit=24)
-        closes = [float(item[4]) for item in klines]
-        volumes = [float(item[5]) for item in klines]
+        closes, volumes = _series_from_klines(klines)
        concentration = concentration_map.get(symbol, 0.0) / total_held
        opportunity_score, metrics = score_opportunity_signal(closes, volumes, ticker, opportunity_config)
        score = opportunity_score - thresholds["overlap_penalty"] * concentration
-        action, reasons = _action_for_opportunity(score, metrics, thresholds)
        metrics["opportunity_score"] = round(opportunity_score, 4)
        metrics["position_weight"] = round(concentration, 4)
+        research = external_research.get(symbol, {})
+        research_score = _research_score(research, weights)
+        score += research_score
+        metrics["research_score"] = round(research_score, 4)
+        _add_research_metrics(metrics, research)
+        action, reasons = _action_for_opportunity(score, metrics, thresholds)
        if symbol.endswith(quote):
            reasons.append(f"base asset {base_asset(symbol, quote)} passed liquidity and tradability filters")
        if concentration > 0:
--- a/src/coinhunter/services/portfolio_service.py
+++ b/src/coinhunter/services/portfolio_service.py
@@ -8,7 +8,11 @@ from typing import Any
 from ..audit import audit_event
 from .account_service import get_positions
 from .market_service import normalize_symbol
-from .signal_service import get_signal_interval, get_signal_weights, score_portfolio_signal
+from .signal_service import (
+    get_signal_interval,
+    get_signal_weights,
+    score_portfolio_signal,
+)


@dataclass
--- a/src/coinhunter/services/research_service.py
+++ b/src/coinhunter/services/research_service.py
@@ -0,0 +1,214 @@
+"""External research signal providers for opportunity scoring."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable
+from math import log10
+from typing import Any
+from urllib.parse import urlencode
+from urllib.request import Request, urlopen
+
+from .market_service import base_asset, normalize_symbol
+
+HttpGet = Callable[[str, dict[str, str], float], Any]
+
+
+def _clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
+    return min(max(value, low), high)
+
+
+def _as_float(value: Any, default: float = 0.0) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _safe_ratio(numerator: float, denominator: float) -> float:
+    if denominator <= 0:
+        return 0.0
+    return numerator / denominator
+
+
+def _log_score(value: float, *, floor: float, span: float) -> float:
+    if value <= 0:
+        return 0.0
+    return _clamp((log10(value) - floor) / span)
+
+
+def _pct_score(value: float, *, low: float, high: float) -> float:
+    if high <= low:
+        return 0.0
+    return _clamp((value - low) / (high - low))
+
+
+def _public_http_get(url: str, headers: dict[str, str], timeout: float) -> Any:
+    request = Request(url, headers=headers)
+    with urlopen(request, timeout=timeout) as response:  # noqa: S310 - user-configured market data endpoint
+        return json.loads(response.read().decode("utf-8"))
+
+
+def _build_url(base_url: str, path: str, params: dict[str, str]) -> str:
+    return f"{base_url.rstrip('/')}{path}?{urlencode(params)}"
+
+
+def _chunked(items: list[str], size: int) -> list[list[str]]:
+    return [items[index : index + size] for index in range(0, len(items), size)]
+
+
+def _coingecko_market_to_signals(row: dict[str, Any], *, is_trending: bool = False) -> dict[str, float]:
+    market_cap = _as_float(row.get("market_cap"))
+    fdv = _as_float(row.get("fully_diluted_valuation"))
+    volume = _as_float(row.get("total_volume"))
+    rank = _as_float(row.get("market_cap_rank"), 9999.0)
+    circulating = _as_float(row.get("circulating_supply"))
+    total_supply = _as_float(row.get("total_supply"))
+    max_supply = _as_float(row.get("max_supply"))
+    supply_cap = max_supply or total_supply
+
+    rank_score = _clamp(1.0 - (log10(max(rank, 1.0)) / 4.0))
+    size_score = _log_score(market_cap, floor=7.0, span=5.0)
+    volume_to_mcap = _safe_ratio(volume, market_cap)
+    liquidity_quality = _clamp(volume_to_mcap / 0.10)
+
+    fdv_ratio = _safe_ratio(fdv, market_cap) if fdv and market_cap else 1.0
+    fdv_dilution_risk = _clamp((fdv_ratio - 1.0) / 4.0)
+    supply_unlocked = _clamp(_safe_ratio(circulating, supply_cap)) if supply_cap else max(0.0, 1.0 - fdv_dilution_risk)
+    supply_dilution_risk = 1.0 - supply_unlocked
+    unlock_risk = max(fdv_dilution_risk, supply_dilution_risk * 0.8)
+
+    pct_7d = _as_float(row.get("price_change_percentage_7d_in_currency"))
+    pct_30d = _as_float(row.get("price_change_percentage_30d_in_currency"))
+    pct_200d = _as_float(row.get("price_change_percentage_200d_in_currency"))
+    medium_momentum = _pct_score(pct_30d, low=-15.0, high=60.0)
+    long_momentum = _pct_score(pct_200d, low=-40.0, high=150.0)
+    trend_catalyst = _pct_score(pct_7d, low=-5.0, high=25.0)
+    trend_bonus = 1.0 if is_trending else 0.0
+
+    tokenomics = _clamp(0.65 * supply_unlocked + 0.35 * (1.0 - fdv_dilution_risk))
+    fundamental = _clamp(0.40 * rank_score + 0.35 * size_score + 0.25 * liquidity_quality)
+    catalyst = _clamp(0.45 * trend_catalyst + 0.40 * medium_momentum + 0.15 * trend_bonus)
+    adoption = _clamp(0.45 * rank_score + 0.35 * liquidity_quality + 0.20 * long_momentum)
+    smart_money = _clamp(0.35 * rank_score + 0.35 * liquidity_quality + 0.30 * (1.0 - unlock_risk))
+    regulatory_risk = 0.10 if rank <= 100 else 0.20 if rank <= 500 else 0.35
+
+    populated_fields = sum(
+        1
+        for value in (market_cap, fdv, volume, rank, circulating, supply_cap, pct_7d, pct_30d, pct_200d)
+        if value
+    )
+    confidence = _clamp(populated_fields / 9.0)
+
+    return {
+        "fundamental": round(fundamental, 4),
+        "tokenomics": round(tokenomics, 4),
+        "catalyst": round(catalyst, 4),
+        "adoption": round(adoption, 4),
+        "smart_money": round(smart_money, 4),
+        "unlock_risk": round(unlock_risk, 4),
+        "regulatory_risk": round(regulatory_risk, 4),
+        "research_confidence": round(confidence, 4),
+    }
+
+
+def _coingecko_headers(config: dict[str, Any]) -> dict[str, str]:
+    coingecko_config = config.get("coingecko", {})
+    headers = {"accept": "application/json", "user-agent": "coinhunter/2"}
+    api_key = str(coingecko_config.get("api_key", "")).strip()
+    if api_key:
+        headers["x-cg-demo-api-key"] = api_key
+    return headers
+
+
+def _fetch_coingecko_research(
+    config: dict[str, Any],
+    *,
+    symbols: list[str],
+    quote: str,
+    http_get: HttpGet | None = None,
+) -> dict[str, dict[str, float]]:
+    if not symbols:
+        return {}
+
+    opportunity_config = config.get("opportunity", {})
+    coingecko_config = config.get("coingecko", {})
+    base_url = str(coingecko_config.get("base_url", "https://api.coingecko.com/api/v3"))
+    timeout = _as_float(opportunity_config.get("research_timeout_seconds"), 4.0)
+    headers = _coingecko_headers(config)
+    http_get = http_get or _public_http_get
+
+    base_to_symbol = {
+        base_asset(normalize_symbol(symbol), quote).lower(): normalize_symbol(symbol)
+        for symbol in symbols
+        if normalize_symbol(symbol)
+    }
+    bases = sorted(base_to_symbol)
+    if not bases:
+        return {}
+
+    trending_ids: set[str] = set()
+    try:
+        trending_url = _build_url(base_url, "/search/trending", {})
+        trending_payload = http_get(trending_url, headers, timeout)
+        for item in trending_payload.get("coins", []):
+            coin = item.get("item", {})
+            coin_id = str(coin.get("id", "")).strip()
+            if coin_id:
+                trending_ids.add(coin_id)
+    except Exception:
+        trending_ids = set()
+
+    research: dict[str, dict[str, float]] = {}
+    for chunk in _chunked(bases, 50):
+        params = {
+            "vs_currency": "usd",
+            "symbols": ",".join(chunk),
+            "include_tokens": "top",
+            "order": "market_cap_desc",
+            "per_page": "250",
+            "page": "1",
+            "sparkline": "false",
+            "price_change_percentage": "7d,30d,200d",
+        }
+        try:
+            markets_url = _build_url(base_url, "/coins/markets", params)
+            rows = http_get(markets_url, headers, timeout)
+        except Exception:
+            continue
+
+        seen_bases: set[str] = set()
+        for row in rows if isinstance(rows, list) else []:
+            symbol = str(row.get("symbol", "")).lower()
+            if symbol in seen_bases or symbol not in base_to_symbol:
+                continue
+            seen_bases.add(symbol)
+            normalized = base_to_symbol[symbol]
+            research[normalized] = _coingecko_market_to_signals(
+                row,
+                is_trending=str(row.get("id", "")) in trending_ids,
+            )
+    return research
+
+
+def get_external_research(
+    config: dict[str, Any],
+    *,
+    symbols: list[str],
+    quote: str,
+    http_get: HttpGet | None = None,
+) -> dict[str, dict[str, float]]:
+    """Fetch automated research signals for symbols.
+
+    Returns an empty map when disabled or when the configured provider is unavailable.
+    Opportunity scans should continue rather than fail because a research endpoint timed out.
+    """
+    opportunity_config = config.get("opportunity", {})
+    if not bool(opportunity_config.get("auto_research", True)):
+        return {}
+    provider = str(opportunity_config.get("research_provider", "coingecko")).strip().lower()
+    if provider in {"", "off", "none", "disabled"}:
+        return {}
+    if provider != "coingecko":
+        return {}
+    return _fetch_coingecko_research(config, symbols=symbols, quote=quote, http_get=http_get)