angxt commited on
Commit
fd8cfee
ยท
verified ยท
1 Parent(s): 6356568

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +401 -0
app.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_final.py (final debugged version)
2
+ import streamlit as st
3
+ import requests
4
+ import yfinance as yf
5
+ import pandas as pd
6
+ import numpy as np
7
+ import os
8
+ from datetime import datetime, timedelta
9
+ import joblib
10
+ import re
11
+ import time
12
+ import cloudpickle
13
+
14
+ # ---------------------------- CONFIG ----------------------------
15
+ HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
16
+ CRYPTO_NEWS_API_KEY = st.secrets["CRYPTO_NEWS_API_KEY"]
17
+ FRED_API_KEY = st.secrets["FRED_API_KEY"]
18
+
19
+ FINBERT_API = "https://api-inference.huggingface.co/models/ProsusAI/finbert"
20
+ HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
21
+
22
+ TICKERS = {
23
+ "bitcoin": "BTC-USD",
24
+ "gold": "GC=F",
25
+ "sp500": "^GSPC",
26
+ "dxy": "DX-Y.NYB"
27
+ }
28
+
29
+ FRED_CODES = {
30
+ "interest_rate": "FEDFUNDS",
31
+ "inflation": "CPIAUCSL"
32
+ }
33
+ # Load model using cloudpickle
34
+ with open("histgb_pca_model_clean.pkl", "rb") as f:
35
+ model = cloudpickle.load(f)
36
+
37
+ pca = joblib.load("pca.pkl")
38
+ scaler = joblib.load("scaler.pkl")
39
+
40
+ # ---------------------------- FUNCTIONS ----------------------------
41
+ def fetch_news(source):
42
+ url = f"https://cryptonews-api.com/api/v1/category"
43
+ params = {
44
+ "section": "general",
45
+ "items": 10,
46
+ "page": 1,
47
+ "source": source,
48
+ "token": CRYPTO_NEWS_API_KEY
49
+ }
50
+ r = requests.get(url, params=params)
51
+ articles = r.json().get("data", [])
52
+ texts = []
53
+ for art in articles:
54
+ summary = art.get("text") or art.get("content", "").split(".")[0]
55
+ texts.append(summary)
56
+ return texts
57
+
58
+ def call_finbert(news_list):
59
+ results_df = []
60
+ news_list = news_list[:5]
61
+ for idx, news in enumerate(news_list):
62
+ if not isinstance(news, str) or not news.strip():
63
+ results_df.append({"positive": 0.0, "neutral": 0.0, "negative": 0.0})
64
+ continue
65
+ payload = {"inputs": news}
66
+ for attempt in range(5):
67
+ try:
68
+ response = requests.post(FINBERT_API, headers=HEADERS, json=payload, timeout=30)
69
+ response.raise_for_status()
70
+ output = response.json()
71
+
72
+ # Get raw scores
73
+ scores_raw = {item["label"].lower(): item["score"] for item in output[0]}
74
+
75
+ # Ensure fixed column order
76
+ aligned_scores = {
77
+ "positive": scores_raw.get("positive", 0.0),
78
+ "neutral": scores_raw.get("neutral", 0.0),
79
+ "negative": scores_raw.get("negative", 0.0)
80
+ }
81
+
82
+ results_df.append(aligned_scores)
83
+ break
84
+ except requests.exceptions.RequestException as e:
85
+ st.warning(f"โš ๏ธ FinBERT error on article {idx+1}, attempt {attempt+1}/5: {e}")
86
+ time.sleep(2)
87
+ except Exception as ex:
88
+ st.warning(f"โŒ Failed to analyze article {idx+1}: {ex}")
89
+ results_df.append({"positive": 0.0, "neutral": 0.0, "negative": 0.0})
90
+ break
91
+ return pd.DataFrame(results_df)
92
+
93
+ def aggregate_sentiments(sentiment_df):
94
+ scaled = sentiment_df.copy()
95
+ for col in scaled.columns:
96
+ scaled[col] = (scaled[col] - scaled[col].min()) / (scaled[col].max() - scaled[col].min() + 1e-8)
97
+ weighted = scaled.copy()
98
+ for col in ["positive", "negative"]:
99
+ weighted[col] = np.where(scaled[col] > 0.75, scaled[col] * 1.5, scaled[col])
100
+ weighted[col] = np.clip(weighted[col], 0, 1)
101
+ weighted["neutral"] = scaled["neutral"]
102
+ return weighted.mean().to_dict(), (scaled > 0.75).sum().to_dict()
103
+
104
+ def fetch_yahoo_data(ticker, date):
105
+ data = yf.Ticker(ticker).history(start=date, end=date + timedelta(days=1))
106
+ if not data.empty:
107
+ return {
108
+ "open": round(data["Open"].iloc[0], 2),
109
+ "high": round(data["High"].iloc[0], 2),
110
+ "low": round(data["Low"].iloc[0], 2),
111
+ "close": round(data["Close"].iloc[0], 2),
112
+ "volume": int(data["Volume"].iloc[0]) if ticker != TICKERS["dxy"] else None,
113
+ "change_pct": round(((data["Close"].iloc[0] - data["Open"].iloc[0]) / data["Open"].iloc[0]) * 100, 2)
114
+ }
115
+ else:
116
+ st.warning(f"โš ๏ธ No trading data for {ticker} on {date.strftime('%Y-%m-%d')}, using previous available data.")
117
+ return fetch_yahoo_data(ticker, date - timedelta(days=1))
118
+
119
+ def fetch_fred(code, month):
120
+ url = f"https://api.stlouisfed.org/fred/series/observations"
121
+ params = {
122
+ "series_id": code,
123
+ "observation_start": f"{month}-01",
124
+ "api_key": FRED_API_KEY,
125
+ "file_type": "json"
126
+ }
127
+ res = requests.get(url, params=params).json()
128
+ try:
129
+ return float(res["observations"][0]["value"])
130
+ except:
131
+ prev_month = (datetime.strptime(month, "%Y-%m") - timedelta(days=30)).strftime("%Y-%m")
132
+ return fetch_fred(code, prev_month)
133
+
134
+ def make_prediction(input_data):
135
+ expected_cols = list(scaler.feature_names_in_)
136
+
137
+ # SAFETY CHECK
138
+ if len(input_data) != len(expected_cols):
139
+ raise ValueError(f"โŒ Input length mismatch! Got {len(input_data)}, expected {len(expected_cols)}")
140
+
141
+ # Align input values to expected column order
142
+ input_dict = dict(zip(expected_cols, input_data))
143
+ input_df = pd.DataFrame([input_dict])[expected_cols]
144
+
145
+ # DEBUG VIEW
146
+ st.write("๐Ÿ“„ Aligned Input DataFrame:")
147
+ st.dataframe(input_df)
148
+
149
+ # Transform
150
+ x_scaled = scaler.transform(input_df)
151
+ x_pca = pca.transform(x_scaled)
152
+ proba = model.predict_proba(x_pca)[0][1]
153
+ prediction = "Increase" if proba >= 0.62 else "Decrease"
154
+ return prediction, round(proba, 4)
155
+
156
+
157
+ import gspread
158
+ from oauth2client.service_account import ServiceAccountCredentials
159
+
160
+ def log_prediction(record):
161
+ try:
162
+ scope = ["https://spreadsheets.google.com/feeds",
163
+ "https://www.googleapis.com/auth/drive"]
164
+
165
+ creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
166
+ client = gspread.authorize(creds)
167
+
168
+ sheet = client.open("BTC Predictions Log").sheet1 # Must match your actual Google Sheet name
169
+ sheet.append_row(list(record.values()))
170
+ st.success("โœ… Logged to Google Sheet successfully.")
171
+ except Exception as e:
172
+ st.warning(f"โš ๏ธ Logging to Google Sheets failed: {e}")
173
+
174
+ # ---------------------------- STREAMLIT UI ----------------------------
175
+ st.set_page_config(page_title="Next Day Bitcoin Price Movement", layout="wide")
176
+ st.title("๐Ÿ”ฎ Next Day Bitcoin Price Movement Predictor")
177
+
178
+ date = st.date_input("Select a date", datetime.today() - timedelta(days=1))
179
+ month = date.strftime("%Y-%m")
180
+
181
+ if "news_loaded" not in st.session_state:
182
+ st.session_state.news_loaded = False
183
+
184
+ sentiment_features = []
185
+ aggregated_display = {}
186
+ news_by_source = {"CryptoNews": [], "CryptoPotato": []}
187
+ edited_news_by_source = {}
188
+
189
+ # ------------------------------------
190
+ # STEP 1: FETCH NEWS + ENABLE EDITING
191
+ # ------------------------------------
192
+ if not st.session_state.news_loaded:
193
+ if st.button("๐Ÿ“ฅ Fetch News"):
194
+ for src in ["CryptoNews", "CryptoPotato"]:
195
+ try:
196
+ news = fetch_news(src)
197
+ news_by_source[src] = news
198
+ st.session_state[src] = "\n\n".join(news) # store for text_area default
199
+ except Exception as e:
200
+ st.warning(f"โš ๏ธ Could not fetch {src}: {e}")
201
+ st.session_state[src] = ""
202
+ st.session_state.news_loaded = True
203
+ st.rerun()
204
+
205
+ # ------------------------------------
206
+ # STEP 2: SHOW TEXT BOXES + RUN PREDICTION
207
+ # ------------------------------------
208
+ if st.session_state.news_loaded:
209
+ st.subheader("๐Ÿ“ Edit News Articles")
210
+ for src in ["CryptoNews", "CryptoPotato"]:
211
+ default_text = st.session_state.get(src, "")
212
+ user_input = st.text_area(f"{src} Articles (5 max, one per paragraph)", default_text, height=300)
213
+ edited_news_by_source[src] = [para.strip() for para in user_input.split("\n\n") if para.strip()]
214
+
215
+ if st.button("๐Ÿ”ฎ Make Prediction"):
216
+ for src in ["CryptoNews", "CryptoPotato"]:
217
+ try:
218
+ news_by_source[src] = edited_news_by_source[src]
219
+ scores_df = call_finbert(news_by_source[src])
220
+ st.write(f"๐Ÿ“Š FinBERT Scores for {src}:", scores_df)
221
+
222
+ weighted_avg, extreme_count = aggregate_sentiments(scores_df)
223
+ total_articles = len(scores_df)
224
+
225
+ pct_scores = {
226
+ "positive_pct": extreme_count.get("positive", 0) / total_articles,
227
+ "neutral_pct": extreme_count.get("neutral", 0) / total_articles,
228
+ "negative_pct": extreme_count.get("negative", 0) / total_articles
229
+ }
230
+
231
+ sentiment_features.extend([
232
+ weighted_avg["positive"],
233
+ weighted_avg["neutral"],
234
+ weighted_avg["negative"],
235
+ pct_scores["positive_pct"],
236
+ pct_scores["neutral_pct"],
237
+ pct_scores["negative_pct"]
238
+ ])
239
+ except Exception as e:
240
+ st.warning(f"โš ๏ธ Failed for {src}: {e}")
241
+ sentiment_features.extend([0.0] * 6)
242
+ news_by_source[src] = []
243
+
244
+ st.markdown("**Aggregated Sentiment**")
245
+ st.write("๐Ÿ”Ž News by Source:", news_by_source)
246
+ sentiment_feature_labels = {
247
+ "cryptonews_positive_weighted": sentiment_features[0],
248
+ "cryptonews_neutral_weighted": sentiment_features[1],
249
+ "cryptonews_negative_weighted": sentiment_features[2],
250
+ "cryptonews_positive_pct": sentiment_features[3],
251
+ "cryptonews_neutral_pct": sentiment_features[4],
252
+ "cryptonews_negative_pct": sentiment_features[5],
253
+ "cryptopotato_positive_weighted": sentiment_features[6],
254
+ "cryptopotato_neutral_weighted": sentiment_features[7],
255
+ "cryptopotato_negative_weighted": sentiment_features[8],
256
+ "cryptopotato_positive_pct": sentiment_features[9],
257
+ "cryptopotato_neutral_pct": sentiment_features[10],
258
+ "cryptopotato_negative_pct": sentiment_features[11],
259
+ }
260
+ st.markdown("### ๐Ÿง  Sentiment Features by Source")
261
+ st.json(sentiment_feature_labels)
262
+
263
+ # Average across both sources
264
+ if len(sentiment_features) == 12:
265
+ aggregated_sentiments = [
266
+ (sentiment_features[0] + sentiment_features[6]) / 2,
267
+ (sentiment_features[1] + sentiment_features[7]) / 2,
268
+ (sentiment_features[2] + sentiment_features[8]) / 2,
269
+ (sentiment_features[3] + sentiment_features[9]) / 2,
270
+ (sentiment_features[4] + sentiment_features[10]) / 2,
271
+ (sentiment_features[5] + sentiment_features[11]) / 2
272
+ ]
273
+ elif len(sentiment_features) == 6:
274
+ aggregated_sentiments = sentiment_features
275
+ else:
276
+ st.warning("โš ๏ธ Sentiment features incomplete. Defaulting to 0s.")
277
+ aggregated_sentiments = [0.0] * 6
278
+
279
+ # Fetch BTC + macro data
280
+ st.subheader("๐Ÿ“ˆ Bitcoin Price Data")
281
+ btc = fetch_yahoo_data(TICKERS["bitcoin"], date)
282
+ st.json(btc)
283
+
284
+ st.subheader("๐Ÿ“Š Macroeconomic Indicators")
285
+ macro = {}
286
+ for k, t in TICKERS.items():
287
+ if k != "bitcoin":
288
+ try:
289
+ macro[k] = fetch_yahoo_data(t, date)
290
+ except Exception as e:
291
+ st.warning(f"โš ๏ธ Failed to fetch {k.upper()} data: {e}")
292
+ macro[k] = {"open": 0, "high": 0, "low": 0, "close": 0, "volume": 0, "change_pct": 0}
293
+ st.json(macro)
294
+
295
+ st.subheader("๐Ÿฉ Fed Indicators")
296
+ fed = {
297
+ "interest_rate": fetch_fred(FRED_CODES["interest_rate"], month),
298
+ "inflation": fetch_fred(FRED_CODES["inflation"], month)
299
+ }
300
+ st.json(fed)
301
+
302
+ # ========== BUILD FINAL INPUT DICT SAFELY ==========
303
+ final_input_dict = {
304
+ "S&P_500_Open": macro["sp500"].get("open", 0),
305
+ "S&P_500_High": macro["sp500"].get("high", 0),
306
+ "S&P_500_Low": macro["sp500"].get("low", 0),
307
+ "S&P_500_Close": macro["sp500"].get("close", 0),
308
+ "S&P_500_Volume": macro["sp500"].get("volume", 0),
309
+ "S&P_500_%_Change": macro["sp500"].get("change_pct", 0),
310
+
311
+ "Gold_Prices_Open": macro["gold"].get("open", 0),
312
+ "Gold_Prices_High": macro["gold"].get("high", 0),
313
+ "Gold_Prices_Low": macro["gold"].get("low", 0),
314
+ "Gold_Prices_Close": macro["gold"].get("close", 0),
315
+ "Gold_Prices_Volume": macro["gold"].get("volume", 0),
316
+ "Gold_Prices_%_Change": macro["gold"].get("change_pct", 0),
317
+
318
+ "US_Dollar_Index_DXY_Open": macro["dxy"].get("open", 0),
319
+ "US_Dollar_Index_DXY_High": macro["dxy"].get("high", 0),
320
+ "US_Dollar_Index_DXY_Low": macro["dxy"].get("low", 0),
321
+ "US_Dollar_Index_DXY_Close": macro["dxy"].get("close", 0),
322
+ "US_Dollar_Index_DXY_%_Change": macro["dxy"].get("change_pct", 0),
323
+
324
+ "Federal_Reserve_Interest_Rates_FEDFUNDS": fed.get("interest_rate", 0),
325
+ "Inflation_CPIAUCNS": fed.get("inflation", 0),
326
+
327
+ "Open": btc.get("open", 0),
328
+ "High": btc.get("high", 0),
329
+ "Low": btc.get("low", 0),
330
+ "Close": btc.get("close", 0),
331
+ "Volume": btc.get("volume", 0),
332
+ "Change %": btc.get("change_pct", 0),
333
+
334
+ "positive_weighted": aggregated_sentiments[0],
335
+ "neutral_weighted": aggregated_sentiments[1],
336
+ "negative_weighted": aggregated_sentiments[2],
337
+ "negative_pct": aggregated_sentiments[5],
338
+ "neutral_pct": aggregated_sentiments[4],
339
+ "positive_pct": aggregated_sentiments[3],
340
+ }
341
+
342
+ # ========== PREPARE & PREDICT ==========
343
+ expected_cols = list(scaler.feature_names_in_)
344
+ final_input = [final_input_dict[col] for col in expected_cols]
345
+
346
+ if any(pd.isna(x) for x in final_input):
347
+ st.error("โŒ Missing or invalid input data. Please check news, market, or macro feeds.")
348
+ else:
349
+ # Prepare aligned input
350
+ input_df = pd.DataFrame([final_input_dict])[expected_cols]
351
+ x_scaled = scaler.transform(input_df)
352
+ x_pca = pca.transform(x_scaled)
353
+
354
+ # Model prediction
355
+ proba = model.predict_proba(x_pca)[0][1]
356
+ prediction = "Increase" if proba >= 0.62 else "Decrease"
357
+
358
+ # PCA features table
359
+ pca_df = pd.DataFrame(x_pca, columns=[f"PC{i+1}" for i in range(x_pca.shape[1])])
360
+ st.markdown("### ๐Ÿงฌ PCA-Transformed Features")
361
+ st.dataframe(pca_df.style.format("{:.4f}"))
362
+
363
+ # Prediction display
364
+ st.subheader("๐Ÿ”ฎ Prediction")
365
+ if prediction == "Decrease":
366
+ st.markdown(
367
+ f"<div style='background-color:#fbeaea;color:#9e1c1c;padding:10px;border-radius:8px;'>"
368
+ f"<b>Next Day BTC Price:</b> {prediction} (Prob: {proba:.2f})</div>",
369
+ unsafe_allow_html=True
370
+ )
371
+ else:
372
+ st.success(f"Next Day BTC Price: **{prediction}** (Prob: {proba:.2f})")
373
+
374
+ # Log prediction
375
+ log = {
376
+ "fetch_date": datetime.today().strftime("%Y-%m-%d"),
377
+ "btc_open": btc["open"],
378
+ "btc_close": btc["close"],
379
+ "sent_pos": aggregated_sentiments[0],
380
+ "sent_neu": aggregated_sentiments[1],
381
+ "sent_neg": aggregated_sentiments[2],
382
+ "sent_pos_pct": aggregated_sentiments[3],
383
+ "sent_neu_pct": aggregated_sentiments[4],
384
+ "sent_neg_pct": aggregated_sentiments[5],
385
+ "macro_gold": macro["gold"]["close"],
386
+ "macro_sp500": macro["sp500"]["close"],
387
+ "macro_dxy": macro["dxy"]["close"],
388
+ "interest_rate": fed["interest_rate"],
389
+ "inflation": fed["inflation"],
390
+ "prediction": prediction,
391
+ "prob": proba,
392
+ "news_cryptonews": " || ".join(news_by_source["CryptoNews"]),
393
+ "news_cryptopotato": " || ".join(news_by_source["CryptoPotato"])
394
+ }
395
+
396
+ log_prediction(log)
397
+ st.success("โœ… Logged to predictions_log.csv")
398
+
399
+
400
+
401
+