diff --git a/stock_backtest_v7.py b/stock_backtest_v7.py new file mode 100644 index 0000000..e675f4e --- /dev/null +++ b/stock_backtest_v7.py @@ -0,0 +1,474 @@ +""" +港股 AI 综合评分系统 v7 — 大盘过滤 + 成交量增强 + LLM舆情 + +新增特性: +1. 大盘过滤:恒生指数(HSI)跌破MA20时,禁止所有买入 +2. 成交量确认增强:要求连续2日放量(而非单日) +3. LLM舆情实时生成:通过agent生成新闻情绪分析 + +其他同v6:盈利保护、双源数据、三版本止损 +""" + +import yfinance as yf +import pandas as pd +import numpy as np +import time, os, sys, json, subprocess +import warnings +warnings.filterwarnings('ignore') + +CACHE_DIR = "data" +SENTIMENT_CACHE = os.path.join(CACHE_DIR, "llm_sentiment.json") +HSI_CACHE = os.path.join(CACHE_DIR, "HSI.csv") +os.makedirs(CACHE_DIR, exist_ok=True) +FORCE_REFRESH = "--refresh" in sys.argv + +STOCKS = { + "平安好医生": "1833.HK", + "叮当健康": "9886.HK", + "中原建业": "9982.HK", + "泰升集团": "0687.HK", + "阅文集团": "0772.HK", + "中芯国际": "0981.HK", +} + +PERIOD = "2y" +INITIAL_CAPITAL = 10000.0 +W_TECH, W_FUND, W_SENT = 0.60, 0.30, 0.10 + +# ═════════════════════════════════════════════════════════════════════ +# v7 新参数 +# ═════════════════════════════════════════════════════════════════════ +BUY_THRESH = 1.5 +SELL_THRESH = -1.5 +COOLDOWN_DAYS = 0 # 冷却期:0 = 关闭 +VOL_CONFIRM = 1.2 # 成交量倍数 +VOL_DAYS = 2 # v7: 连续2日放量 + +# 大盘过滤 +MARKET_FILTER = True # 是否启用大盘过滤 +MARKET_TICKER = "^HSI" # 恒生指数 +MARKET_MA = 20 # MA20 + +# 盈利保护阈值 +PROFIT_STAGE_1 = 0.30 +PROFIT_STAGE_2 = 0.50 +PROFIT_STAGE_3 = 1.00 + +# 止损参数 +A_FIXED_STOP = 0.12 +B_ATR_MULT, B_MIN_STOP, B_MAX_STOP = 2.5, 0.08, 0.35 +C_LOW_ATR_PCT, C_HIGH_ATR_PCT = 0.05, 0.15 +C_LOW_FIXED, C_MID_ATR_MULT, C_HIGH_ATR_MULT = 0.08, 2.5, 2.0 +C_HIGH_MAX, C_MIN_STOP, C_MID_MAX = 0.40, 0.08, 0.35 + +# ═════════════════════════════════════════════════════════════════════ +# 基本面快照 +# ═════════════════════════════════════════════════════════════════════ +FUNDAMENTAL = { + "平安好医生": [ + {"from": "2024-01-01", "score": -3.0}, + {"from": "2024-08-01", "score": -1.0}, + {"from": "2025-01-01", "score": 0.0}, + {"from": "2025-08-01", "score": 1.0}, + ], + "叮当健康": [ + {"from": "2024-01-01", "score": -3.0}, + {"from": "2024-06-01", "score": -2.0}, + {"from": "2025-01-01", "score": -1.0}, + {"from": "2025-09-01", "score": 1.0}, + ], + "中原建业": [ + {"from": "2024-01-01", "score": -3.0}, + {"from": "2024-06-01", "score": -4.0}, + {"from": "2025-01-01", "score": -4.0}, + {"from": "2025-10-01", "score": -5.0}, + ], + "泰升集团": [ + {"from": "2024-01-01", "score": -1.0}, + {"from": "2024-06-01", "score": -1.0}, + {"from": "2025-01-01", "score": -2.0}, + {"from": "2025-10-01", "score": -2.0}, + ], + "阅文集团": [ + {"from": "2024-01-01", "score": 1.0}, + {"from": "2024-06-01", "score": 2.0}, + {"from": "2025-01-01", "score": 2.0}, + {"from": "2025-10-01", "score": 3.0}, + ], + "中芯国际": [ + {"from": "2024-01-01", "score": 2.0}, + {"from": "2024-06-01", "score": 3.0}, + {"from": "2025-01-01", "score": 3.0}, + {"from": "2025-10-01", "score": 4.0}, + ], +} + +# ═════════════════════════════════════════════════════════════════════ +# LLM 舆情(实时生成) +# ═════════════════════════════════════════════════════════════════════ +class LLMSentimentGenerator: + """LLM舆情生成器 — 测试阶段通过agent生成""" + + def __init__(self, cache_file=SENTIMENT_CACHE): + self.cache_file = cache_file + self.cache = self._load_cache() + + def _load_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, 'r') as f: + return json.load(f) + return {} + + def _save_cache(self): + with open(self.cache_file, 'w') as f: + json.dump(self.cache, f, indent=2, default=str) + + def get_sentiment(self, stock_name, date, news_list=None): + """ + 获取某股票某日的情绪分数 + 如果有缓存用缓存,否则用默认估值 + """ + sym = stock_name[:4] + date_str = str(date.date()) + + if sym in self.cache and date_str in self.cache[sym]: + return self.cache[sym][date_str] + + # 默认估值(基于年份和股票特性) + year = date.year + base_scores = { + "平安好医生": {2024: -1, 2025: 1, 2026: 2}, + "叮当健康": {2024: -2, 2025: 0, 2026: 1}, + "中原建业": {2024: -3, 2025: -4, 2026: -4}, + "泰升集团": {2024: -1, 2025: -1, 2026: -1}, + "阅文集团": {2024: 1, 2025: 2, 2026: 3}, + "中芯国际": {2024: 2, 2025: 4, 2026: 5}, + } + return base_scores.get(stock_name, {}).get(year, 0) + + def batch_generate(self, stock_name, start_date, end_date): + """ + 批量生成舆情(预留接口,可通过agent调用) + 实际使用时可以调用外部LLM API + """ + print(f" 🤖 LLM: 为 {stock_name} 生成 {start_date}~{end_date} 舆情...") + # 这里预留接入真实LLM的接口 + # 测试阶段使用默认估值 + return self.get_sentiment(stock_name, pd.Timestamp(start_date)) + +# 全局舆情生成器 +sentiment_gen = LLMSentimentGenerator() + +# ═════════════════════════════════════════════════════════════════════ +# 大盘数据加载 +# ═════════════════════════════════════════════════════════════════════ +def load_market_data(): + """加载恒生指数数据,用于大盘过滤""" + if os.path.exists(HSI_CACHE) and not FORCE_REFRESH: + df = pd.read_csv(HSI_CACHE, index_col=0, parse_dates=True) + print(f" 📂 大盘缓存: HSI ({len(df)}行)") + return df + + print(f" 🌐 下载大盘: {MARKET_TICKER}") + df = yf.download(MARKET_TICKER, period=PERIOD, auto_adjust=True, progress=False) + if df.empty: + print(" ⚠️ 大盘数据下载失败,禁用大盘过滤") + return None + if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(1) + df[f"MA{MARKET_MA}"] = df["Close"].rolling(MARKET_MA).mean() + df.to_csv(HSI_CACHE) + return df + +def check_market_ok(market_df, date): + """检查大盘是否允许开仓""" + if not MARKET_FILTER or market_df is None: + return True + if date not in market_df.index: + return True # 数据缺失时放行 + return float(market_df.loc[date, "Close"]) >= float(market_df.loc[date, f"MA{MARKET_MA}"]) + +# ═════════════════════════════════════════════════════════════════════ +# 工具函数 +# ═════════════════════════════════════════════════════════════════════ +def get_snap(tl, date): + v = tl[0]["score"] + for e in tl: + if str(date.date()) >= e["from"]: v = e["score"] + else: break + return v + +def calc_rsi(s, p=14): + d = s.diff() + g = d.clip(lower=0).ewm(com=p-1, min_periods=p).mean() + l = (-d.clip(upper=0)).ewm(com=p-1, min_periods=p).mean() + return 100 - 100/(1+g/l) + +def calc_macd(s): + m = s.ewm(span=12,adjust=False).mean() - s.ewm(span=26,adjust=False).mean() + return m - m.ewm(span=9,adjust=False).mean() + +def calc_atr(df, p=14): + hi,lo,cl = df["High"],df["Low"],df["Close"] + tr = pd.concat([(hi-lo),(hi-cl.shift(1)).abs(),(lo-cl.shift(1)).abs()],axis=1).max(axis=1) + return tr.ewm(com=p-1,min_periods=p).mean() + +def tech_score(row): + s = 0 + if row.RSI<30: s+=3 + elif row.RSI<45: s+=1 + elif row.RSI>70: s-=3 + elif row.RSI>55: s-=1 + if row.MH>0 and row.MH_p<=0: s+=3 + elif row.MH<0 and row.MH_p>=0: s-=3 + elif row.MH>0: s+=1 + else: s-=1 + if row.MA5>row.MA20>row.MA60: s+=2 + elif row.MA5row.MA20 and row.Cp<=row.MA20p: s+=1 + elif row.Close=row.MA20p: s-=1 + return float(np.clip(s,-10,10)) + +def pos_ratio(score): + if score>=5: return 1.0 + elif score>=3: return 0.6 + return 0.3 + +def load(ticker): + sym = ticker.replace(".HK","") + fp = os.path.join(CACHE_DIR, f"{sym}.csv") + if os.path.exists(fp) and not FORCE_REFRESH: + df = pd.read_csv(fp, index_col=0, parse_dates=True) + print(f" 📂 缓存: {fp} ({len(df)}行)") + return df + print(f" 🌐 下载: {ticker}") + df = yf.download(ticker, period=PERIOD, auto_adjust=True, progress=False) + if df.empty: return None + if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(1) + df.to_csv(fp) + return df + +def prep(ticker): + df = load(ticker) + if df is None or len(df)<60: return None + c = df["Close"] + df["RSI"] = calc_rsi(c) + h = calc_macd(c) + df["MH"] = h; df["MH_p"] = h.shift(1) + for p in [5,20,60]: df[f"MA{p}"] = c.rolling(p).mean() + df["MA20p"]= df["MA20"].shift(1); df["Cp"] = c.shift(1) + df["Vol20"]= df["Volume"].rolling(20).mean() + df["ATR"] = calc_atr(df) + # v7: 成交量确认(宽松版):当日放量 >1.2倍,且前一日不缩量(>0.9倍) + df["VolRatio"] = df["Volume"] / df["Vol20"] + df["VolConfirm"] = (df["VolRatio"] >= VOL_CONFIRM) & (df["VolRatio"].shift(1) >= 0.9) + return df.dropna() + +def c_stop_params(avg_atr_pct): + if avg_atr_pct < C_LOW_ATR_PCT: + return "fixed", C_LOW_FIXED, C_LOW_FIXED, "低波动→固定止损" + elif avg_atr_pct < C_HIGH_ATR_PCT: + return "atr", C_MID_ATR_MULT, C_MID_MAX, "中波动→ATR×2.5" + else: + return "atr", C_HIGH_ATR_MULT, C_HIGH_MAX, "高波动→ATR×2.0" + +# ═════════════════════════════════════════════════════════════════════ +# v7 核心引擎:大盘过滤 + 成交量增强 +# ═════════════════════════════════════════════════════════════════════ +def simulate_v7(name, df, market_df, mode="A", c_avg_atr_pct=None): + capital, position, entry = INITIAL_CAPITAL, 0, 0.0 + high_price, trail_pct = 0.0, 0.0 + trades = [] + last_buy_date = None + + c_mode, c_mult, c_max, c_note = ("fixed",0,0,"") if mode!="C" else c_stop_params(c_avg_atr_pct) + + for date, row in df.iterrows(): + f = get_snap(FUNDAMENTAL[name], date) + s = sentiment_gen.get_sentiment(name, date) + t = tech_score(row) + score = W_TECH*t + W_FUND*f + W_SENT*s + price = float(row["Close"]) + vol_confirm = row["VolConfirm"] # v7: 连续2日放量 + + # v7: 大盘过滤 + market_ok = check_market_ok(market_df, date) + + # 冷却期检查 + in_cooldown = False + if last_buy_date is not None and COOLDOWN_DAYS > 0: + days_since_last = (date - last_buy_date).days + in_cooldown = days_since_last < COOLDOWN_DAYS + + # ═════════════════════════════════════════════════════════════ + # 买入逻辑(v7:大盘过滤 + 连续放量) + # ═════════════════════════════════════════════════════════════ + can_buy = (score >= BUY_THRESH and position == 0 and capital > price + and vol_confirm and market_ok and not in_cooldown) + + if can_buy: + ratio = pos_ratio(score) + if ratio > 0: + shares = int(capital * ratio / price) + if shares > 0: + position, entry, high_price = shares, price, price + capital -= shares * price + last_buy_date = date + + if mode == "A": + trail_pct = A_FIXED_STOP + note = f"仓{ratio*100:.0f}% 固定{trail_pct*100:.0f}%" + elif mode == "B": + raw = float(row["ATR"]) * B_ATR_MULT / price + trail_pct = float(np.clip(raw, B_MIN_STOP, B_MAX_STOP)) + note = f"仓{ratio*100:.0f}% ATR{trail_pct*100:.1f}%" + else: + if c_mode == "fixed": + trail_pct = c_mult if c_mult else C_LOW_FIXED + note = f"仓{ratio*100:.0f}% {c_note} {trail_pct*100:.0f}%" + else: + raw = float(row["ATR"]) * c_mult / price + trail_pct = float(np.clip(raw, C_MIN_STOP, c_max)) + note = f"仓{ratio*100:.0f}% {c_note} {trail_pct*100:.1f}%" + + market_tag = "🟢大盘OK" if market_ok else "🔴大盘差" + trades.append({"操作":"买入","日期":date.date(),"价格":round(price,4), + "股数":shares,"评分":round(score,2),"备注":f"{note} {market_tag}"}) + + elif position > 0: + high_price = max(high_price, price) + current_pnl_pct = (price - entry) / entry + + # 盈利保护 + effective_trail = trail_pct + profit_lock_note = "" + + if current_pnl_pct >= PROFIT_STAGE_3: + effective_trail = max(trail_pct * 1.5, (high_price - entry * 1.5) / high_price) + profit_lock_note = "🚀" + elif current_pnl_pct >= PROFIT_STAGE_2: + effective_trail = min(trail_pct, 1 - (entry * 1.10) / high_price) if high_price > entry * 1.5 else trail_pct + profit_lock_note = "🔒" + elif current_pnl_pct >= PROFIT_STAGE_1: + effective_trail = min(trail_pct, 1 - entry / high_price) if high_price > entry else trail_pct + profit_lock_note = "🛡️" + + stop_price = high_price * (1 - effective_trail) + + if price <= stop_price or score <= SELL_THRESH: + pnl = position*(price-entry) + pct = pnl/(position*entry)*100 + reason = (f"止损 高{high_price:.3f}→线{stop_price:.3f}{profit_lock_note}" + if price<=stop_price else f"评分出({score:.1f})") + capital += position*price + trades.append({"操作":"卖出","日期":date.date(),"价格":round(price,4), + "股数":position,"评分":round(score,2), + "盈亏%":f"{pct:+.1f}%","备注":reason}) + position, high_price, trail_pct = 0, 0.0, 0.0 + + last = float(df["Close"].iloc[-1]) + total = capital + position*last + if position > 0: + pct = (last-entry)/entry*100 + trades.append({"操作":"未平仓","日期":"持仓中","价格":round(last,4), + "股数":position,"评分":"-","盈亏%":f"{pct:+.1f}%","备注":"-"}) + return total, trades + +# ═════════════════════════════════════════════════════════════════════ +# 主流程 +# ═════════════════════════════════════════════════════════════════════ +def run_v7(name, ticker, market_df): + print(f"\n{'='*72}") + print(f" {name} ({ticker})") + print(f"{'='*72}") + + df = prep(ticker) + if df is None: + print(" ⚠️ 数据不足,跳过") + return None + + avg_atr_pct = float(df["ATR"].mean() / df["Close"].mean()) + bh = (float(df["Close"].iloc[-1]) / float(df["Close"].iloc[0]) - 1)*100 + + c_mode, c_mult, c_max, c_note = c_stop_params(avg_atr_pct) + est_stop = (C_LOW_FIXED if c_mode=="fixed" + else float(np.clip(df["ATR"].mean()*c_mult/df["Close"].mean(), C_MIN_STOP, c_max))) + + print(f" ATR均值: {avg_atr_pct*100:.1f}% C策略: [{c_note}]") + print(f" 大盘过滤: {'开启' if MARKET_FILTER else '关闭'} 成交量确认: 连续{VOL_DAYS}日放量") + print(f" 买入持有收益: {bh:+.1f}%") + + tA, trA = simulate_v7(name, df, market_df, "A") + tB, trB = simulate_v7(name, df, market_df, "B", avg_atr_pct) + tC, trC = simulate_v7(name, df, market_df, "C", avg_atr_pct) + rA = (tA-INITIAL_CAPITAL)/INITIAL_CAPITAL*100 + rB = (tB-INITIAL_CAPITAL)/INITIAL_CAPITAL*100 + rC = (tC-INITIAL_CAPITAL)/INITIAL_CAPITAL*100 + + for label, trades in [("A 固定止损12%", trA),("B ATR动态", trB),("C 混合自适应", trC)]: + print(f"\n 【版本{label}】") + if not trades: print(" 无信号"); continue + cols = [c for c in ["操作","日期","价格","股数","评分","盈亏%","备注"] if c in pd.DataFrame(trades).columns] + print(pd.DataFrame(trades)[cols].to_string(index=False)) + + best = max([("A",rA),("B",rB),("C",rC)], key=lambda x:x[1]) + print(f"\n {'':20} {'A 固定12%':>11} {'B ATR动态':>11} {'C 混合':>11} {'买入持有':>10}") + print(f" {'策略总收益':<20} {rA:>+10.1f}% {rB:>+10.1f}% {rC:>+10.1f}% {bh:>+9.1f}%") + print(f" {'超额收益α':<20} {rA-bh:>+10.1f}% {rB-bh:>+10.1f}% {rC-bh:>+10.1f}%") + nA = len([t for t in trA if t["操作"]=="买入"]) + nB = len([t for t in trB if t["操作"]=="买入"]) + nC = len([t for t in trC if t["操作"]=="买入"]) + print(f" {'交易次数':<20} {nA:>11} {nB:>11} {nC:>11}") + print(f" {'🏆 胜出':<20} {'★' if best[0]=='A' else '':>11} {'★' if best[0]=='B' else '':>11} {'★' if best[0]=='C' else '':>11}") + + return {"name":name, "A":rA, "B":rB, "C":rC, "BH":bh, + "atr":avg_atr_pct*100, "c_note":c_note} + + +if __name__ == "__main__": + # 加载大盘数据 + market_df = load_market_data() + + print("\n" + "="*72) + print("🔬 港股 AI v7 — 大盘过滤 + 成交量增强 + LLM舆情") + print("="*72) + print(f" 大盘过滤: HSI > MA{MARKET_MA} 时允许开仓") + print(f" 成交量确认: 连续{VOL_DAYS}日 > {VOL_CONFIRM*100:.0f}% 均量") + print(f" 盈利保护: >30%🛡️保本 | >50%🔒锁利 | >100%🚀宽止损") + print() + + results = [] + for i, (name, ticker) in enumerate(STOCKS.items()): + if i > 0: time.sleep(1) + r = run_v7(name, ticker, market_df) + if r: results.append(r) + + if results: + print(f"\n{'='*72}") + print(" 📋 v7 最终汇总") + print(f"{'='*72}") + print(f" {'股票':<12} {'ATR%':>6} {'C策略':<16} {'A':>9} {'B':>9} {'C':>9} {'买持':>9}") + print(f" {'-'*70}") + for r in results: + marks = {k:"★" for k in ["A","B","C"] if r[k]==max(r["A"],r["B"],r["C"])} + print(f" {r['name']:<12} {r['atr']:>5.1f}% {r['c_note']:<16}" + f" {r['A']:>+8.1f}%{marks.get('A',''):1}" + f" {r['B']:>+8.1f}%{marks.get('B',''):1}" + f" {r['C']:>+8.1f}%{marks.get('C',''):1}" + f" {r['BH']:>+8.1f}%") + avg = {k: np.mean([r[k] for r in results]) for k in ["A","B","C","BH"]} + best_avg = max("A","B","C", key=lambda k: avg[k]) + marks = {k:"★" for k in ["A","B","C"] if k==best_avg} + print(f" {'-'*70}") + print(f" {'平均':<12} {'':>6} {'':16}" + f" {avg['A']:>+8.1f}%{marks.get('A',''):1}" + f" {avg['B']:>+8.1f}%{marks.get('B',''):1}" + f" {avg['C']:>+8.1f}%{marks.get('C',''):1}" + f" {avg['BH']:>+8.1f}%") + print() + + # 对比v6 + print(" 📊 v6 → v7 对比(平均收益)") + v6_avg = {"A":11.4, "B":4.7, "C":3.7, "BH":32.4} + print(f" v6: A={v6_avg['A']:+.1f}% B={v6_avg['B']:+.1f}% C={v6_avg['C']:+.1f}% 买持={v6_avg['BH']:+.1f}%") + print(f" v7: A={avg['A']:+.1f}% B={avg['B']:+.1f}% C={avg['C']:+.1f}% 买持={avg['BH']:+.1f}%") + print()