prakashkota's picture
links to article
2e7be42
#+--------------------------------------------------------------------------------------------+
# Model for Stock Price Prediction via Dense only Neural Network
# Using today vs tomorrow analysis
#
# Written by: Prakash R. Kota
# Location: East Greenbush, NY
#
# Using just a Dense Neural Network Model
# Adding lots of direct stock parmeters from Yahoo Finance
# Open, High, Low, Close, Volume
#
# Will not be using other parameters such as
# Return, SMA10, EMA10, RollingVol10,
# SP500, Nasdaq, VIX
# RSI, Day-of-Week
# Removed all saving and graphing from PRK_1a_tf_Stock_NN.ipnyb
# Keeping it minimal for just the stock prediction and output display table
# Goal is to make a MVP - Minimal Viable Product
# Infer the NN Model from the Saved Model - app.py required for Hugging Face Space
#
# PRK_1b_2_Model_Infer_tf_Stock_NN.ipnyb - Using Tensorflow 2.16.2 CPU
# based on
# PRK_1b_tf_Stock_NN.ipnyb
# PRK_1a_tf_Stock_NN.ipnyb
# PRK_11b_tf_Stock_DenseOnly.ipynb
# Renamed PRK_10e_tf_Stock_DenseOnly.ipynb for convenience
# This is the best Notebook Code for the NN Model
#
# Written on: 28 Mar 2025
# Last update: 02 Apr 2025
#+--------------------------------------------------------------------------------------------+
import os
# MUST come before importing yfinance
os.environ["YFINANCE_NO_CACHE"] = "1" # disable cache
os.environ["XDG_CACHE_HOME"] = "/tmp/xfake_cache"
if not os.path.exists("/tmp/xfake_cache"):
os.makedirs("/tmp/xfake_cache", exist_ok=True)
import yfinance as yf
import gradio as gr
import numpy as np
import pandas as pd
from datetime import datetime
from pandas.tseries.offsets import BDay
from tabulate import tabulate
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import joblib
import sklearn
import tensorflow as tf
import shutil
from pytz import timezone
from pandas.tseries.offsets import BDay
import hashlib
import yfinance as yf
import time
import gc
import threading
lock = threading.Lock()
# This forces all inference requests to run one at a time, which avoids file
# lock conflicts in SQLite. It’s simple but effective for debugging.
# --- Load saved model and scalers --- #
model_dir = "./model"
NN_model = load_model(os.path.join(model_dir, "NN_CPU_model.keras"))
NN_model.compile(optimizer="adam", loss="mse") # even if not used, ensures full init
NN_model.predict(np.zeros((1, 5))) # warm-up dummy prediction
scaler_X = joblib.load(os.path.join(model_dir, "scaler_X.pkl"))
scaler_y = joblib.load(os.path.join(model_dir, "scaler_y.pkl"))
def safe_download(*args, retries=3, delay=1, **kwargs):
for i in range(retries):
try:
df = yf.download(*args, **kwargs)
if not df.empty:
return df
except Exception as e:
print(f"[Attempt {i+1}] yfinance error: {e}")
time.sleep(delay)
raise RuntimeError("yfinance download failed after retries.")
# --- Inference Function --- #
def predict_stock():
with lock:
# lightweight log print near the top of predict_stock() to verify it's hitting the cache config:
print("YFINANCE_NO_CACHE =", os.getenv("YFINANCE_NO_CACHE"))
# Check for time zone
now_est = datetime.now(timezone("US/Eastern"))
print("Current Eastern Time:", now_est)
print("Trying to fetch data up to:", now_est.strftime('%Y-%m-%d'))
# --- Clear yfinance cache to get latest volume and price data --- #
cache_path = os.path.expanduser("~/.cache/py-yfinance")
if os.path.exists(cache_path):
print("Clearing yfinance cache...")
shutil.rmtree(cache_path)
Stock = "NVDA"
start_date = "2020-01-01"
train_end_date = "2024-12-31"
#today = datetime.today().strftime('%Y-%m-%d')
# Use EST for consistently for today
today = now_est.strftime('%Y-%m-%d')
# Download the full dataset (might contain stale final row)
# solves any error with empty dataframes
try:
full_data = safe_download(
tickers=Stock,
start=start_date,
end=today,
interval="1d",
auto_adjust=False,
actions=False,
progress=False,
threads=True #<-- for parallel downloads, use True
)
if full_data.empty:
print("yfinance returned empty data for:", today)
return "Error: Stock data not available at this time. Please try again shortly.", pd.DataFrame()
except Exception as e:
print("yfinance error:", e)
return "Error: Could not fetch stock data. Please try again later.", pd.DataFrame()
features = ["Open", "High", "Low", "Close", "Volume"]
X_scaled = scaler_X.transform(full_data[features])
y = full_data["Close"].values.reshape(-1, 1)
y_scaled = scaler_y.transform(y)
X_all = X_scaled[:-1]
y_all = y_scaled[1:].flatten()
dates_all = full_data.index[1:]
test_mask = dates_all > pd.to_datetime(train_end_date)
X_test, y_test = X_all[test_mask], y_all[test_mask]
dates_test = dates_all[test_mask]
# Predict next day prices
y_pred_scaled = NN_model.predict(X_test).flatten()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
y_true = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()
last_date = full_data.index[-1]
last_close_price = float(full_data["Close"].iloc[-1].item())
X_input = X_scaled[-1].reshape(1, -1)
next_day_pred_scaled = NN_model.predict(X_input)
next_day_pred = scaler_y.inverse_transform(next_day_pred_scaled)[0][0]
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
std_ape = np.std(np.abs((y_true - y_pred) / y_true)) * 100
mape_margin = next_day_pred * (mape / 100)
sae_margin = next_day_pred * (std_ape / 100)
next_date = (last_date + BDay(1)).date()
summary_lines = [
f"Prediction for {Stock}:",
f"Last available date: {last_date.date()}, Close = ${last_close_price:.2f}",
f"Predicted closing price for next trading day ({next_date}): ${next_day_pred:.2f}",
f"Expected range (±MAPE): ${next_day_pred - mape_margin:.2f} to ${next_day_pred + mape_margin:.2f}",
f"Expected range (±SAE): ${next_day_pred - sae_margin:.2f} to ${next_day_pred + sae_margin:.2f}"
]
summary = "\n".join(summary_lines)
prediction_df = pd.DataFrame({
'Date': dates_test,
'Actual Close': y_true,
'Predicted Close': y_pred
})
prediction_df['% Error'] = ((prediction_df['Actual Close'] - prediction_df['Predicted Close']) / prediction_df['Actual Close']) * 100
prediction_df['% Error'] = prediction_df['% Error'].map(lambda x: f"{x:+.2f}%")
prediction_df['±MAPE Range'] = prediction_df['Predicted Close'].apply(
lambda x: f"${x * (1 - mape/100):.2f} to ${x * (1 + mape/100):.2f}"
)
prediction_df['Date'] = prediction_df['Date'].dt.strftime("%Y-%m-%d")
prediction_df['Actual Close'] = prediction_df['Actual Close'].map(lambda x: f"${x:.2f}")
prediction_df['Predicted Close'] = prediction_df['Predicted Close'].map(lambda x: f"${x:.2f}")
prediction_df = prediction_df.sort_values("Date", ascending=False)
headers = ["Prediction For Date", "Actual Close", "Predicted Close", "% Error", "±MAPE Range"]
table = tabulate(prediction_df.values, headers=headers, tablefmt="plain")
# Start Sanity Checks
assert not np.any(np.isnan(X_scaled[-1].reshape(1, -1))), "NaNs detected in input!"
assert X_scaled[-1].reshape(1, -1).shape == (1, X_scaled.shape[1]), f"Unexpected shape: {X_scaled[-1].reshape(1, -1).shape}"
#print("X_input shape:", X_scaled[-1].reshape(1, -1).shape)
#print("X_input contains NaNs:", np.any(np.isnan(X_scaled[-1].reshape(1, -1))))
#print("Latest data row used for prediction:")
#print(full_data[features].iloc[-1])
#print("scikit-learn version:", sklearn.__version__)
#print("tensorflow version:", tf.__version__)
#print("scaler_X min/max:", scaler_X.data_min_, scaler_X.data_max_)
#print("scaler_y min/max:", scaler_y.data_min_, scaler_y.data_max_)
# Run a prediction on a known fixed input
#x_debug = np.array([[0.5, 0.5, 0.5, 0.5, 0.5]])
#y_debug = NN_model.predict(x_debug)
#y_debug_unscaled = scaler_y.inverse_transform(y_debug)
#print("Debug prediction (scaled):", y_debug)
#print("Debug prediction (unscaled):", y_debug_unscaled)
gc.collect() #<-- garbage collection after predict
import hashlib
def md5(fname):
with open(fname, "rb") as f:
return hashlib.md5(f.read()).hexdigest()
#print("Model MD5 checksum:", md5(os.path.join(model_dir, "NN_CPU_model.keras")))
#print(full_data.tail(3))
# End Sanitiy Checks
return summary, prediction_df[["Date", "Actual Close", "Predicted Close", "% Error", "±MAPE Range"]]
# --- Gradio Interface --- #
"""
demo = gr.Interface(
fn=predict_stock,
inputs=[],
outputs=[
gr.Textbox(label="Prediction Summary", lines=6),
gr.Dataframe(headers=["Prediction For Date", "Actual Close", "Predicted Close", "% Error", "±MAPE Range"], label="Prediction Table (2025+)")
],
title="📈 NVDA Stock Predictor",
description="This app uses a Dense Neural Network to predict NVDA's next trading day's closing price.",
live=True #<-- changed to True for live queuing
)
demo.launch()
"""
with gr.Blocks() as demo:
# Title
gr.Markdown("## 📊 NVDA Stock Prediction Using Neural Networks")
# Outputs
output_summary = gr.Textbox(label="Prediction Summary", lines=6)
output_table = gr.Dataframe(headers=["Prediction For Date", "Actual Close", "Predicted Close", "% Error", "±MAPE Range"], label="Prediction Table (2025+)")
# Generate Button
generate_btn = gr.Button("Generate")
generate_btn.click(fn=predict_stock, inputs=[], outputs=[output_summary, output_table])
# External article link
gr.HTML("""<p style='margin-top: 20px;'><a href="https://prakashkota.com/2025/04/09/ai-ml-in-finance-how-a-lightweight-neural-network-forecasts-nvdas-next-stock-price-move/" target="_blank">
👉 Read the full article here - AI/ML in Finance: How a Lightweight Neural Network Forecasts NVDA’s Next Stock Price Move</a></p>""")
# Disclaimer paragraph
gr.HTML("""
<p style='font-size: 14px; color: gray; line-height: 1.5;'>
<strong>Disclaimer:</strong> The information provided in this article and through the linked prediction model is for educational and informational purposes only.
It does not constitute financial, investment, or trading advice, and should not be relied upon as such.<br><br>
Any decisions made based on the model's output are solely at the user’s own risk. I make no guarantees regarding the accuracy, completeness, or reliability of the predictions.
I am not responsible for any financial losses or gains resulting from the use of this model.<br><br>
Always consult with a licensed financial advisor before making any investment decisions.
</p>
""")
demo.launch(share=True)