Files
stockbuddy/stock_backtest_v7.py

475 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
港股 AI 综合评分系统 v7 — 大盘过滤 + 成交量增强 + LLM舆情
新增特性:
1. 大盘过滤:恒生指数(HSI)跌破MA20时禁止所有买入
2. 成交量确认增强要求连续2日放量而非单日
3. LLM舆情实时生成通过agent生成新闻情绪分析
其他同v6盈利保护、双源数据、三版本止损
"""
import yfinance as yf
import pandas as pd
import numpy as np
import time, os, sys, json, subprocess
import warnings
warnings.filterwarnings('ignore')
CACHE_DIR = "data"
SENTIMENT_CACHE = os.path.join(CACHE_DIR, "llm_sentiment.json")
HSI_CACHE = os.path.join(CACHE_DIR, "HSI.csv")
os.makedirs(CACHE_DIR, exist_ok=True)
FORCE_REFRESH = "--refresh" in sys.argv
STOCKS = {
"平安好医生": "1833.HK",
"叮当健康": "9886.HK",
"中原建业": "9982.HK",
"泰升集团": "0687.HK",
"阅文集团": "0772.HK",
"中芯国际": "0981.HK",
}
PERIOD = "2y"
INITIAL_CAPITAL = 10000.0
W_TECH, W_FUND, W_SENT = 0.60, 0.30, 0.10
# ═════════════════════════════════════════════════════════════════════
# v7 新参数
# ═════════════════════════════════════════════════════════════════════
BUY_THRESH = 1.5
SELL_THRESH = -1.5
COOLDOWN_DAYS = 0 # 冷却期0 = 关闭
VOL_CONFIRM = 1.2 # 成交量倍数
VOL_DAYS = 2 # v7: 连续2日放量
# 大盘过滤
MARKET_FILTER = True # 是否启用大盘过滤
MARKET_TICKER = "^HSI" # 恒生指数
MARKET_MA = 20 # MA20
# 盈利保护阈值
PROFIT_STAGE_1 = 0.30
PROFIT_STAGE_2 = 0.50
PROFIT_STAGE_3 = 1.00
# 止损参数
A_FIXED_STOP = 0.12
B_ATR_MULT, B_MIN_STOP, B_MAX_STOP = 2.5, 0.08, 0.35
C_LOW_ATR_PCT, C_HIGH_ATR_PCT = 0.05, 0.15
C_LOW_FIXED, C_MID_ATR_MULT, C_HIGH_ATR_MULT = 0.08, 2.5, 2.0
C_HIGH_MAX, C_MIN_STOP, C_MID_MAX = 0.40, 0.08, 0.35
# ═════════════════════════════════════════════════════════════════════
# 基本面快照
# ═════════════════════════════════════════════════════════════════════
FUNDAMENTAL = {
"平安好医生": [
{"from": "2024-01-01", "score": -3.0},
{"from": "2024-08-01", "score": -1.0},
{"from": "2025-01-01", "score": 0.0},
{"from": "2025-08-01", "score": 1.0},
],
"叮当健康": [
{"from": "2024-01-01", "score": -3.0},
{"from": "2024-06-01", "score": -2.0},
{"from": "2025-01-01", "score": -1.0},
{"from": "2025-09-01", "score": 1.0},
],
"中原建业": [
{"from": "2024-01-01", "score": -3.0},
{"from": "2024-06-01", "score": -4.0},
{"from": "2025-01-01", "score": -4.0},
{"from": "2025-10-01", "score": -5.0},
],
"泰升集团": [
{"from": "2024-01-01", "score": -1.0},
{"from": "2024-06-01", "score": -1.0},
{"from": "2025-01-01", "score": -2.0},
{"from": "2025-10-01", "score": -2.0},
],
"阅文集团": [
{"from": "2024-01-01", "score": 1.0},
{"from": "2024-06-01", "score": 2.0},
{"from": "2025-01-01", "score": 2.0},
{"from": "2025-10-01", "score": 3.0},
],
"中芯国际": [
{"from": "2024-01-01", "score": 2.0},
{"from": "2024-06-01", "score": 3.0},
{"from": "2025-01-01", "score": 3.0},
{"from": "2025-10-01", "score": 4.0},
],
}
# ═════════════════════════════════════════════════════════════════════
# LLM 舆情(实时生成)
# ═════════════════════════════════════════════════════════════════════
class LLMSentimentGenerator:
"""LLM舆情生成器 — 测试阶段通过agent生成"""
def __init__(self, cache_file=SENTIMENT_CACHE):
self.cache_file = cache_file
self.cache = self._load_cache()
def _load_cache(self):
if os.path.exists(self.cache_file):
with open(self.cache_file, 'r') as f:
return json.load(f)
return {}
def _save_cache(self):
with open(self.cache_file, 'w') as f:
json.dump(self.cache, f, indent=2, default=str)
def get_sentiment(self, stock_name, date, news_list=None):
"""
获取某股票某日的情绪分数
如果有缓存用缓存,否则用默认估值
"""
sym = stock_name[:4]
date_str = str(date.date())
if sym in self.cache and date_str in self.cache[sym]:
return self.cache[sym][date_str]
# 默认估值(基于年份和股票特性)
year = date.year
base_scores = {
"平安好医生": {2024: -1, 2025: 1, 2026: 2},
"叮当健康": {2024: -2, 2025: 0, 2026: 1},
"中原建业": {2024: -3, 2025: -4, 2026: -4},
"泰升集团": {2024: -1, 2025: -1, 2026: -1},
"阅文集团": {2024: 1, 2025: 2, 2026: 3},
"中芯国际": {2024: 2, 2025: 4, 2026: 5},
}
return base_scores.get(stock_name, {}).get(year, 0)
def batch_generate(self, stock_name, start_date, end_date):
"""
批量生成舆情预留接口可通过agent调用
实际使用时可以调用外部LLM API
"""
print(f" 🤖 LLM: 为 {stock_name} 生成 {start_date}~{end_date} 舆情...")
# 这里预留接入真实LLM的接口
# 测试阶段使用默认估值
return self.get_sentiment(stock_name, pd.Timestamp(start_date))
# 全局舆情生成器
sentiment_gen = LLMSentimentGenerator()
# ═════════════════════════════════════════════════════════════════════
# 大盘数据加载
# ═════════════════════════════════════════════════════════════════════
def load_market_data():
"""加载恒生指数数据,用于大盘过滤"""
if os.path.exists(HSI_CACHE) and not FORCE_REFRESH:
df = pd.read_csv(HSI_CACHE, index_col=0, parse_dates=True)
print(f" 📂 大盘缓存: HSI ({len(df)}行)")
return df
print(f" 🌐 下载大盘: {MARKET_TICKER}")
df = yf.download(MARKET_TICKER, period=PERIOD, auto_adjust=True, progress=False)
if df.empty:
print(" ⚠️ 大盘数据下载失败,禁用大盘过滤")
return None
if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(1)
df[f"MA{MARKET_MA}"] = df["Close"].rolling(MARKET_MA).mean()
df.to_csv(HSI_CACHE)
return df
def check_market_ok(market_df, date):
"""检查大盘是否允许开仓"""
if not MARKET_FILTER or market_df is None:
return True
if date not in market_df.index:
return True # 数据缺失时放行
return float(market_df.loc[date, "Close"]) >= float(market_df.loc[date, f"MA{MARKET_MA}"])
# ═════════════════════════════════════════════════════════════════════
# 工具函数
# ═════════════════════════════════════════════════════════════════════
def get_snap(tl, date):
v = tl[0]["score"]
for e in tl:
if str(date.date()) >= e["from"]: v = e["score"]
else: break
return v
def calc_rsi(s, p=14):
d = s.diff()
g = d.clip(lower=0).ewm(com=p-1, min_periods=p).mean()
l = (-d.clip(upper=0)).ewm(com=p-1, min_periods=p).mean()
return 100 - 100/(1+g/l)
def calc_macd(s):
m = s.ewm(span=12,adjust=False).mean() - s.ewm(span=26,adjust=False).mean()
return m - m.ewm(span=9,adjust=False).mean()
def calc_atr(df, p=14):
hi,lo,cl = df["High"],df["Low"],df["Close"]
tr = pd.concat([(hi-lo),(hi-cl.shift(1)).abs(),(lo-cl.shift(1)).abs()],axis=1).max(axis=1)
return tr.ewm(com=p-1,min_periods=p).mean()
def tech_score(row):
s = 0
if row.RSI<30: s+=3
elif row.RSI<45: s+=1
elif row.RSI>70: s-=3
elif row.RSI>55: s-=1
if row.MH>0 and row.MH_p<=0: s+=3
elif row.MH<0 and row.MH_p>=0: s-=3
elif row.MH>0: s+=1
else: s-=1
if row.MA5>row.MA20>row.MA60: s+=2
elif row.MA5<row.MA20<row.MA60: s-=2
if row.Close>row.MA20 and row.Cp<=row.MA20p: s+=1
elif row.Close<row.MA20 and row.Cp>=row.MA20p: s-=1
return float(np.clip(s,-10,10))
def pos_ratio(score):
if score>=5: return 1.0
elif score>=3: return 0.6
return 0.3
def load(ticker):
sym = ticker.replace(".HK","")
fp = os.path.join(CACHE_DIR, f"{sym}.csv")
if os.path.exists(fp) and not FORCE_REFRESH:
df = pd.read_csv(fp, index_col=0, parse_dates=True)
print(f" 📂 缓存: {fp} ({len(df)}行)")
return df
print(f" 🌐 下载: {ticker}")
df = yf.download(ticker, period=PERIOD, auto_adjust=True, progress=False)
if df.empty: return None
if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(1)
df.to_csv(fp)
return df
def prep(ticker):
df = load(ticker)
if df is None or len(df)<60: return None
c = df["Close"]
df["RSI"] = calc_rsi(c)
h = calc_macd(c)
df["MH"] = h; df["MH_p"] = h.shift(1)
for p in [5,20,60]: df[f"MA{p}"] = c.rolling(p).mean()
df["MA20p"]= df["MA20"].shift(1); df["Cp"] = c.shift(1)
df["Vol20"]= df["Volume"].rolling(20).mean()
df["ATR"] = calc_atr(df)
# v7: 成交量确认(宽松版):当日放量 >1.2倍,且前一日不缩量(>0.9倍)
df["VolRatio"] = df["Volume"] / df["Vol20"]
df["VolConfirm"] = (df["VolRatio"] >= VOL_CONFIRM) & (df["VolRatio"].shift(1) >= 0.9)
return df.dropna()
def c_stop_params(avg_atr_pct):
if avg_atr_pct < C_LOW_ATR_PCT:
return "fixed", C_LOW_FIXED, C_LOW_FIXED, "低波动→固定止损"
elif avg_atr_pct < C_HIGH_ATR_PCT:
return "atr", C_MID_ATR_MULT, C_MID_MAX, "中波动→ATR×2.5"
else:
return "atr", C_HIGH_ATR_MULT, C_HIGH_MAX, "高波动→ATR×2.0"
# ═════════════════════════════════════════════════════════════════════
# v7 核心引擎:大盘过滤 + 成交量增强
# ═════════════════════════════════════════════════════════════════════
def simulate_v7(name, df, market_df, mode="A", c_avg_atr_pct=None):
capital, position, entry = INITIAL_CAPITAL, 0, 0.0
high_price, trail_pct = 0.0, 0.0
trades = []
last_buy_date = None
c_mode, c_mult, c_max, c_note = ("fixed",0,0,"") if mode!="C" else c_stop_params(c_avg_atr_pct)
for date, row in df.iterrows():
f = get_snap(FUNDAMENTAL[name], date)
s = sentiment_gen.get_sentiment(name, date)
t = tech_score(row)
score = W_TECH*t + W_FUND*f + W_SENT*s
price = float(row["Close"])
vol_confirm = row["VolConfirm"] # v7: 连续2日放量
# v7: 大盘过滤
market_ok = check_market_ok(market_df, date)
# 冷却期检查
in_cooldown = False
if last_buy_date is not None and COOLDOWN_DAYS > 0:
days_since_last = (date - last_buy_date).days
in_cooldown = days_since_last < COOLDOWN_DAYS
# ═════════════════════════════════════════════════════════════
# 买入逻辑v7大盘过滤 + 连续放量)
# ═════════════════════════════════════════════════════════════
can_buy = (score >= BUY_THRESH and position == 0 and capital > price
and vol_confirm and market_ok and not in_cooldown)
if can_buy:
ratio = pos_ratio(score)
if ratio > 0:
shares = int(capital * ratio / price)
if shares > 0:
position, entry, high_price = shares, price, price
capital -= shares * price
last_buy_date = date
if mode == "A":
trail_pct = A_FIXED_STOP
note = f"{ratio*100:.0f}% 固定{trail_pct*100:.0f}%"
elif mode == "B":
raw = float(row["ATR"]) * B_ATR_MULT / price
trail_pct = float(np.clip(raw, B_MIN_STOP, B_MAX_STOP))
note = f"{ratio*100:.0f}% ATR{trail_pct*100:.1f}%"
else:
if c_mode == "fixed":
trail_pct = c_mult if c_mult else C_LOW_FIXED
note = f"{ratio*100:.0f}% {c_note} {trail_pct*100:.0f}%"
else:
raw = float(row["ATR"]) * c_mult / price
trail_pct = float(np.clip(raw, C_MIN_STOP, c_max))
note = f"{ratio*100:.0f}% {c_note} {trail_pct*100:.1f}%"
market_tag = "🟢大盘OK" if market_ok else "🔴大盘差"
trades.append({"操作":"买入","日期":date.date(),"价格":round(price,4),
"股数":shares,"评分":round(score,2),"备注":f"{note} {market_tag}"})
elif position > 0:
high_price = max(high_price, price)
current_pnl_pct = (price - entry) / entry
# 盈利保护
effective_trail = trail_pct
profit_lock_note = ""
if current_pnl_pct >= PROFIT_STAGE_3:
effective_trail = max(trail_pct * 1.5, (high_price - entry * 1.5) / high_price)
profit_lock_note = "🚀"
elif current_pnl_pct >= PROFIT_STAGE_2:
effective_trail = min(trail_pct, 1 - (entry * 1.10) / high_price) if high_price > entry * 1.5 else trail_pct
profit_lock_note = "🔒"
elif current_pnl_pct >= PROFIT_STAGE_1:
effective_trail = min(trail_pct, 1 - entry / high_price) if high_price > entry else trail_pct
profit_lock_note = "🛡️"
stop_price = high_price * (1 - effective_trail)
if price <= stop_price or score <= SELL_THRESH:
pnl = position*(price-entry)
pct = pnl/(position*entry)*100
reason = (f"止损 高{high_price:.3f}→线{stop_price:.3f}{profit_lock_note}"
if price<=stop_price else f"评分出({score:.1f})")
capital += position*price
trades.append({"操作":"卖出","日期":date.date(),"价格":round(price,4),
"股数":position,"评分":round(score,2),
"盈亏%":f"{pct:+.1f}%","备注":reason})
position, high_price, trail_pct = 0, 0.0, 0.0
last = float(df["Close"].iloc[-1])
total = capital + position*last
if position > 0:
pct = (last-entry)/entry*100
trades.append({"操作":"未平仓","日期":"持仓中","价格":round(last,4),
"股数":position,"评分":"-","盈亏%":f"{pct:+.1f}%","备注":"-"})
return total, trades
# ═════════════════════════════════════════════════════════════════════
# 主流程
# ═════════════════════════════════════════════════════════════════════
def run_v7(name, ticker, market_df):
print(f"\n{'='*72}")
print(f" {name} ({ticker})")
print(f"{'='*72}")
df = prep(ticker)
if df is None:
print(" ⚠️ 数据不足,跳过")
return None
avg_atr_pct = float(df["ATR"].mean() / df["Close"].mean())
bh = (float(df["Close"].iloc[-1]) / float(df["Close"].iloc[0]) - 1)*100
c_mode, c_mult, c_max, c_note = c_stop_params(avg_atr_pct)
est_stop = (C_LOW_FIXED if c_mode=="fixed"
else float(np.clip(df["ATR"].mean()*c_mult/df["Close"].mean(), C_MIN_STOP, c_max)))
print(f" ATR均值: {avg_atr_pct*100:.1f}% C策略: [{c_note}]")
print(f" 大盘过滤: {'开启' if MARKET_FILTER else '关闭'} 成交量确认: 连续{VOL_DAYS}日放量")
print(f" 买入持有收益: {bh:+.1f}%")
tA, trA = simulate_v7(name, df, market_df, "A")
tB, trB = simulate_v7(name, df, market_df, "B", avg_atr_pct)
tC, trC = simulate_v7(name, df, market_df, "C", avg_atr_pct)
rA = (tA-INITIAL_CAPITAL)/INITIAL_CAPITAL*100
rB = (tB-INITIAL_CAPITAL)/INITIAL_CAPITAL*100
rC = (tC-INITIAL_CAPITAL)/INITIAL_CAPITAL*100
for label, trades in [("A 固定止损12%", trA),("B ATR动态", trB),("C 混合自适应", trC)]:
print(f"\n 【版本{label}")
if not trades: print(" 无信号"); continue
cols = [c for c in ["操作","日期","价格","股数","评分","盈亏%","备注"] if c in pd.DataFrame(trades).columns]
print(pd.DataFrame(trades)[cols].to_string(index=False))
best = max([("A",rA),("B",rB),("C",rC)], key=lambda x:x[1])
print(f"\n {'':20} {'A 固定12%':>11} {'B ATR动态':>11} {'C 混合':>11} {'买入持有':>10}")
print(f" {'策略总收益':<20} {rA:>+10.1f}% {rB:>+10.1f}% {rC:>+10.1f}% {bh:>+9.1f}%")
print(f" {'超额收益α':<20} {rA-bh:>+10.1f}% {rB-bh:>+10.1f}% {rC-bh:>+10.1f}%")
nA = len([t for t in trA if t["操作"]=="买入"])
nB = len([t for t in trB if t["操作"]=="买入"])
nC = len([t for t in trC if t["操作"]=="买入"])
print(f" {'交易次数':<20} {nA:>11} {nB:>11} {nC:>11}")
print(f" {'🏆 胜出':<20} {'' if best[0]=='A' else '':>11} {'' if best[0]=='B' else '':>11} {'' if best[0]=='C' else '':>11}")
return {"name":name, "A":rA, "B":rB, "C":rC, "BH":bh,
"atr":avg_atr_pct*100, "c_note":c_note}
if __name__ == "__main__":
# 加载大盘数据
market_df = load_market_data()
print("\n" + "="*72)
print("🔬 港股 AI v7 — 大盘过滤 + 成交量增强 + LLM舆情")
print("="*72)
print(f" 大盘过滤: HSI > MA{MARKET_MA} 时允许开仓")
print(f" 成交量确认: 连续{VOL_DAYS}日 > {VOL_CONFIRM*100:.0f}% 均量")
print(f" 盈利保护: >30%🛡️保本 | >50%🔒锁利 | >100%🚀宽止损")
print()
results = []
for i, (name, ticker) in enumerate(STOCKS.items()):
if i > 0: time.sleep(1)
r = run_v7(name, ticker, market_df)
if r: results.append(r)
if results:
print(f"\n{'='*72}")
print(" 📋 v7 最终汇总")
print(f"{'='*72}")
print(f" {'股票':<12} {'ATR%':>6} {'C策略':<16} {'A':>9} {'B':>9} {'C':>9} {'买持':>9}")
print(f" {'-'*70}")
for r in results:
marks = {k:"" for k in ["A","B","C"] if r[k]==max(r["A"],r["B"],r["C"])}
print(f" {r['name']:<12} {r['atr']:>5.1f}% {r['c_note']:<16}"
f" {r['A']:>+8.1f}%{marks.get('A',''):1}"
f" {r['B']:>+8.1f}%{marks.get('B',''):1}"
f" {r['C']:>+8.1f}%{marks.get('C',''):1}"
f" {r['BH']:>+8.1f}%")
avg = {k: np.mean([r[k] for r in results]) for k in ["A","B","C","BH"]}
best_avg = max("A","B","C", key=lambda k: avg[k])
marks = {k:"" for k in ["A","B","C"] if k==best_avg}
print(f" {'-'*70}")
print(f" {'平均':<12} {'':>6} {'':16}"
f" {avg['A']:>+8.1f}%{marks.get('A',''):1}"
f" {avg['B']:>+8.1f}%{marks.get('B',''):1}"
f" {avg['C']:>+8.1f}%{marks.get('C',''):1}"
f" {avg['BH']:>+8.1f}%")
print()
# 对比v6
print(" 📊 v6 → v7 对比(平均收益)")
v6_avg = {"A":11.4, "B":4.7, "C":3.7, "BH":32.4}
print(f" v6: A={v6_avg['A']:+.1f}% B={v6_avg['B']:+.1f}% C={v6_avg['C']:+.1f}% 买持={v6_avg['BH']:+.1f}%")
print(f" v7: A={avg['A']:+.1f}% B={avg['B']:+.1f}% C={avg['C']:+.1f}% 买持={avg['BH']:+.1f}%")
print()