Spaces:

Knight-coderr
/

StockAnalysis

Running

App Files Files Community

StockAnalysis / app.py

Knight-coderr

Update app.py

7cded74 verified 12 days ago

raw

history blame contribute delete

8.04 kB

	import streamlit as st
	import pandas as pd
	import yfinance as yf
	from textblob import TextBlob
	import joblib
	import matplotlib.pyplot as plt
	from datetime import datetime
	import plotly.express as px

	# Function to load stock data using yfinance/ this is going to refresh after 1 day
	@st.cache_data(ttl=86400)
	def load_stock_data(tickers, start_date, end_date):
	with st.spinner('Downloading stock data...'):
	data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=True)

	all_data = []
	for ticker in tickers:
	df = data[ticker].copy().reset_index()
	df['Stock Name'] = ticker
	all_data.append(df)

	merged_data = pd.concat(all_data, ignore_index=True)
	return merged_data

	tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL', 'NFLX', 'TSM',
	'KO', 'F', 'COST', 'DIS', 'VZ', 'CRM', 'INTC', 'BA', 'BX', 'NOC', 'PYPL', 'ENPH', 'NIO', 'ZS', 'XPEV']
	start_date = (datetime.today() - pd.DateOffset(years=1)).strftime('%Y-%m-%d')
	end_date = datetime.today().strftime('%Y-%m-%d')

	# Cache stock data for 1 day using st.cache_data
	@st.cache_data(ttl=86400)
	def load_and_cache_stock_data():
	return load_stock_data(tickers, start_date, end_date)

	# Initialize stock_data once at app startup
	if "stock_data" not in st.session_state:
	st.session_state["stock_data"] = load_and_cache_stock_data()

	stock_data = st.session_state["stock_data"]


	# Perform sentiment analysis on tweets (assuming you still have your tweets data)
	tweets_data = pd.read_csv('data/stock_tweets.csv')

	# Convert the Date columns to datetime
	tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date

	# Perform sentiment analysis on tweets
	def get_sentiment(tweet):
	analysis = TextBlob(tweet)
	return analysis.sentiment.polarity

	tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)

	# Aggregate sentiment by date and stock
	daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()

	# Convert the Date column in daily_sentiment to datetime64[ns]
	daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])

	# Merge stock data with sentiment data
	merged_data = pd.merge(stock_data, daily_sentiment, how='left', on=['Date', 'Stock Name'])

	# Fill missing sentiment values with 0 (neutral sentiment)
	merged_data['Sentiment'] = merged_data['Sentiment'].fillna(0)

	# Sort the data by date
	merged_data = merged_data.sort_values(by='Date')

	# Create lagged features
	merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
	merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)

	# Create moving averages
	merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
	merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())

	# Create daily price changes
	merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']

	# Create volatility
	merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())

	# Drop rows with missing values
	merged_data.dropna(inplace=True)

	# Load the best model
	model_filename = 'model/best_model.pkl'
	model = joblib.load(model_filename)

	# Streamlit application layout
	st.title("Stock Price Prediction Using Sentiment Analysis")

	# User input for stock data
	st.header("Input Stock Data")
	stock_names = merged_data['Stock Name'].unique()
	selected_stock = st.selectbox("Select Stock Name", stock_names)
	days_to_predict = st.number_input("Number of Days to Predict", min_value=1, max_value=30, value=10)

	# Get the latest data for the selected stock
	latest_data = merged_data[merged_data['Stock Name'] == selected_stock].iloc[-1]
	prev_close = latest_data['Close']
	prev_sentiment = latest_data['Sentiment']
	ma7 = latest_data['MA7']
	ma14 = latest_data['MA14']
	daily_change = latest_data['Daily_Change']
	volatility = latest_data['Volatility']

	# Display the latest stock data in a table
	latest_data_df = pd.DataFrame({
	'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
	'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
	})

	st.write("Latest Stock Data:")
	st.write(latest_data_df)

	st.write("Use the inputs above to predict the next days close prices of the stock.")
	if st.button("Predict"):
	predictions = []
	latest_date = datetime.now()

	for i in range(days_to_predict):
	X_future = pd.DataFrame({
	'Prev_Close': [prev_close],
	'Prev_Sentiment': [prev_sentiment],
	'MA7': [ma7],
	'MA14': [ma14],
	'Daily_Change': [daily_change],
	'Volatility': [volatility]
	})

	next_day_prediction = model.predict(X_future)[0]
	predictions.append(next_day_prediction)

	# Update features for next prediction
	prev_close = next_day_prediction
	ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
	ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
	daily_change = next_day_prediction - prev_close

	# Prepare prediction data for display
	prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
	prediction_df = pd.DataFrame({
	'Date': prediction_dates,
	'Predicted Close Price': predictions
	})

	st.subheader("Predicted Prices")
	st.dataframe(prediction_df)

	# Plot predictions using Plotly
	fig = px.line(prediction_df, x='Date', y='Predicted Close Price', markers=True, title=f"{selected_stock} Predicted Close Prices")
	st.plotly_chart(fig, use_container_width=True)

	# ----------------------------------------
	# Enhanced Visualizations
	st.header("Enhanced Stock Analysis")
	stock_history = merged_data[merged_data['Stock Name'] == selected_stock]

	# Date filter slider
	min_date = pd.to_datetime(merged_data['Date'].min()).date()
	max_date = pd.to_datetime(merged_data['Date'].max()).date()

	date_range = st.slider(
	"Select Date Range for Visualizations",
	min_value=min_date,
	max_value=max_date,
	value=(min_date, max_date),
	format="YYYY-MM-DD"
	)

	filtered_data = stock_history[(stock_history['Date'] >= pd.to_datetime(date_range[0])) &
	(stock_history['Date'] <= pd.to_datetime(date_range[1]))]

	with st.expander("Price vs Sentiment Trend"):
	fig1 = px.line(filtered_data, x='Date', y=['Close', 'Sentiment'],
	labels={'value': 'Price / Sentiment', 'variable': 'Metric'},
	title=f"{selected_stock} - Close Price & Sentiment")
	st.plotly_chart(fig1, use_container_width=True)

	with st.expander("Volatility Over Time"):
	fig2 = px.line(filtered_data, x='Date', y='Volatility',
	title=f"{selected_stock} - 7-Day Rolling Volatility")
	st.plotly_chart(fig2, use_container_width=True)

	with st.expander("Moving Averages (MA7 vs MA14)"):
	fig3 = px.line(filtered_data, x='Date', y=['MA7', 'MA14'],
	labels={'value': 'Price', 'variable': 'Moving Average'},
	title=f"{selected_stock} - Moving Averages")
	st.plotly_chart(fig3, use_container_width=True)

	with st.expander("Daily Price Change"):
	fig4 = px.line(filtered_data, x='Date', y='Daily_Change',
	title=f"{selected_stock} - Daily Price Change")
	st.plotly_chart(fig4, use_container_width=True)

	with st.expander("Sentiment Distribution"):
	fig5 = px.histogram(filtered_data, x='Sentiment', nbins=30,
	title=f"{selected_stock} - Sentiment Score Distribution")
	st.plotly_chart(fig5, use_container_width=True)