StockAnalysis / app.py
Knight-coderr's picture
Update app.py
7cded74 verified
import streamlit as st
import pandas as pd
import yfinance as yf
from textblob import TextBlob
import joblib
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.express as px
# Function to load stock data using yfinance/ this is going to refresh after 1 day
@st.cache_data(ttl=86400)
def load_stock_data(tickers, start_date, end_date):
with st.spinner('Downloading stock data...'):
data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=True)
all_data = []
for ticker in tickers:
df = data[ticker].copy().reset_index()
df['Stock Name'] = ticker
all_data.append(df)
merged_data = pd.concat(all_data, ignore_index=True)
return merged_data
tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL', 'NFLX', 'TSM',
'KO', 'F', 'COST', 'DIS', 'VZ', 'CRM', 'INTC', 'BA', 'BX', 'NOC', 'PYPL', 'ENPH', 'NIO', 'ZS', 'XPEV']
start_date = (datetime.today() - pd.DateOffset(years=1)).strftime('%Y-%m-%d')
end_date = datetime.today().strftime('%Y-%m-%d')
# Cache stock data for 1 day using st.cache_data
@st.cache_data(ttl=86400)
def load_and_cache_stock_data():
return load_stock_data(tickers, start_date, end_date)
# Initialize stock_data once at app startup
if "stock_data" not in st.session_state:
st.session_state["stock_data"] = load_and_cache_stock_data()
stock_data = st.session_state["stock_data"]
# Perform sentiment analysis on tweets (assuming you still have your tweets data)
tweets_data = pd.read_csv('data/stock_tweets.csv')
# Convert the Date columns to datetime
tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date
# Perform sentiment analysis on tweets
def get_sentiment(tweet):
analysis = TextBlob(tweet)
return analysis.sentiment.polarity
tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
# Aggregate sentiment by date and stock
daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
# Convert the Date column in daily_sentiment to datetime64[ns]
daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
# Merge stock data with sentiment data
merged_data = pd.merge(stock_data, daily_sentiment, how='left', on=['Date', 'Stock Name'])
# Fill missing sentiment values with 0 (neutral sentiment)
merged_data['Sentiment'] = merged_data['Sentiment'].fillna(0)
# Sort the data by date
merged_data = merged_data.sort_values(by='Date')
# Create lagged features
merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
# Create moving averages
merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
# Create daily price changes
merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
# Create volatility
merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
# Drop rows with missing values
merged_data.dropna(inplace=True)
# Load the best model
model_filename = 'model/best_model.pkl'
model = joblib.load(model_filename)
# Streamlit application layout
st.title("Stock Price Prediction Using Sentiment Analysis")
# User input for stock data
st.header("Input Stock Data")
stock_names = merged_data['Stock Name'].unique()
selected_stock = st.selectbox("Select Stock Name", stock_names)
days_to_predict = st.number_input("Number of Days to Predict", min_value=1, max_value=30, value=10)
# Get the latest data for the selected stock
latest_data = merged_data[merged_data['Stock Name'] == selected_stock].iloc[-1]
prev_close = latest_data['Close']
prev_sentiment = latest_data['Sentiment']
ma7 = latest_data['MA7']
ma14 = latest_data['MA14']
daily_change = latest_data['Daily_Change']
volatility = latest_data['Volatility']
# Display the latest stock data in a table
latest_data_df = pd.DataFrame({
'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
})
st.write("Latest Stock Data:")
st.write(latest_data_df)
st.write("Use the inputs above to predict the next days close prices of the stock.")
if st.button("Predict"):
predictions = []
latest_date = datetime.now()
for i in range(days_to_predict):
X_future = pd.DataFrame({
'Prev_Close': [prev_close],
'Prev_Sentiment': [prev_sentiment],
'MA7': [ma7],
'MA14': [ma14],
'Daily_Change': [daily_change],
'Volatility': [volatility]
})
next_day_prediction = model.predict(X_future)[0]
predictions.append(next_day_prediction)
# Update features for next prediction
prev_close = next_day_prediction
ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
daily_change = next_day_prediction - prev_close
# Prepare prediction data for display
prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
prediction_df = pd.DataFrame({
'Date': prediction_dates,
'Predicted Close Price': predictions
})
st.subheader("Predicted Prices")
st.dataframe(prediction_df)
# Plot predictions using Plotly
fig = px.line(prediction_df, x='Date', y='Predicted Close Price', markers=True, title=f"{selected_stock} Predicted Close Prices")
st.plotly_chart(fig, use_container_width=True)
# ----------------------------------------
# Enhanced Visualizations
st.header("Enhanced Stock Analysis")
stock_history = merged_data[merged_data['Stock Name'] == selected_stock]
# Date filter slider
min_date = pd.to_datetime(merged_data['Date'].min()).date()
max_date = pd.to_datetime(merged_data['Date'].max()).date()
date_range = st.slider(
"Select Date Range for Visualizations",
min_value=min_date,
max_value=max_date,
value=(min_date, max_date),
format="YYYY-MM-DD"
)
filtered_data = stock_history[(stock_history['Date'] >= pd.to_datetime(date_range[0])) &
(stock_history['Date'] <= pd.to_datetime(date_range[1]))]
with st.expander("Price vs Sentiment Trend"):
fig1 = px.line(filtered_data, x='Date', y=['Close', 'Sentiment'],
labels={'value': 'Price / Sentiment', 'variable': 'Metric'},
title=f"{selected_stock} - Close Price & Sentiment")
st.plotly_chart(fig1, use_container_width=True)
with st.expander("Volatility Over Time"):
fig2 = px.line(filtered_data, x='Date', y='Volatility',
title=f"{selected_stock} - 7-Day Rolling Volatility")
st.plotly_chart(fig2, use_container_width=True)
with st.expander("Moving Averages (MA7 vs MA14)"):
fig3 = px.line(filtered_data, x='Date', y=['MA7', 'MA14'],
labels={'value': 'Price', 'variable': 'Moving Average'},
title=f"{selected_stock} - Moving Averages")
st.plotly_chart(fig3, use_container_width=True)
with st.expander("Daily Price Change"):
fig4 = px.line(filtered_data, x='Date', y='Daily_Change',
title=f"{selected_stock} - Daily Price Change")
st.plotly_chart(fig4, use_container_width=True)
with st.expander("Sentiment Distribution"):
fig5 = px.histogram(filtered_data, x='Sentiment', nbins=30,
title=f"{selected_stock} - Sentiment Score Distribution")
st.plotly_chart(fig5, use_container_width=True)