File size: 6,497 Bytes
39a7596 6500f58 39a7596 6500f58 39a7596 0fe146c 6500f58 0d93346 6500f58 0d93346 8fe2bec 6500f58 8fe2bec 6500f58 8fe2bec 6500f58 0d93346 6500f58 0d93346 6500f58 39a7596 6500f58 a31fbae 8fe2bec 6500f58 39a7596 6500f58 39a7596 6500f58 39a7596 6500f58 0d93346 c05a6e0 0d93346 c05a6e0 0d93346 c05a6e0 c387f36 c05a6e0 c387f36 80cb8a1 c1d3dad 80cb8a1 c1d3dad 80cb8a1 c387f36 80cb8a1 c387f36 80cb8a1 c387f36 80cb8a1 c387f36 80cb8a1 c387f36 80cb8a1 0d93346 c05a6e0 0d93346 6500f58 80cb8a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import streamlit as st
import pandas as pd
import numpy as np
import time
from datetime import datetime
import joblib
import plotly.express as px
import pinecone
from pinecone import ServerlessSpec
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import os
# --- Load Trained Model and Preprocessor ---
try:
model = joblib.load('price_prediction_model.joblib')
preprocessor = joblib.load('price_preprocessor.joblib')
except FileNotFoundError:
st.error("Model or preprocessor file not found. Please run the training script first.")
st.stop()
# --- Pinecone Setup ---
PINECONE_API_KEY = "pcsk_5CWCFt_9nPiRVa65nG4rs6ZC2cqnEthBSvcEhuQa53TK7FN6rwYZW1qQpacqoAvGUjNCFU"
PINECONE_ENVIRONMENT = "aped-4627-b74a"
PINECONE_INDEX_NAME = "document-qa-index"
NAMESPACE = "your-namespace"
# --- Embedding Model ---
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
# --- Initialize Pinecone Connection ---
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
# Check if the index exists, if not, create it
if PINECONE_INDEX_NAME not in pc.list_indexes().names():
pc.create_index(
name=PINECONE_INDEX_NAME,
dimension=384,
metric='cosine',
spec=ServerlessSpec(cloud='aws', region='us-west-2')
)
# Connect to Pinecone index
index = pc.Index(PINECONE_INDEX_NAME)
# --- Generate Sample Data ---
def generate_single_data(product_type):
payment_methods = ['Credit Card', 'Debit Card', 'UPI', 'Net Banking']
timestamp = datetime.now()
price = round(np.random.uniform(10, 500), 2)
num_clicks = np.random.randint(1, 100)
payment_method = np.random.choice(payment_methods)
customer_id = np.random.randint(1000, 9999)
return pd.DataFrame([{
'timestamp': timestamp,
'product_type': product_type,
'price': price,
'num_clicks': num_clicks,
'payment_method': payment_method,
'customer_id': customer_id
}])
def predict_price(data):
predict_df = data[['product_type', 'num_clicks']]
processed_data = preprocessor.transform(predict_df)
processed_df = pd.DataFrame(processed_data, columns=preprocessor.get_feature_names_out(['product_type', 'num_clicks']))
prediction = model.predict(processed_df)
return prediction[0]
# --- Pinecone Search ---
def search_pinecone(query, top_k=3):
query_embedding = embedding_model.encode([query])[0]
try:
results = index.query(vector=query_embedding.tolist(), top_k=top_k, include_metadata=True, namespace=NAMESPACE)
return results
except Exception as e:
st.error(f"Error during Pinecone search: {e}")
return {}
# --- QA Response Generator ---
def generate_response(query, context, llm_model=""):
try:
pipe = pipeline("question-answering", model=llm_model)
result = pipe(question=query, context=context)
return result['answer'].strip()
except Exception as e:
st.error(f"Error generating response: {e}")
return "Sorry, I could not generate a response."
# --- Streamlit App ---
def main():
st.set_page_config(page_title="Real-Time Sales & Chatbot", layout="wide")
st.title("Real-Time Sales Dashboard + Chatbot")
tabs = st.tabs(["Dashboard", " Chatbot"])
with tabs[0]:
st.sidebar.header("βοΈ Controls")
simulation_speed = st.sidebar.slider("Simulation Speed (seconds)", 0.1, 5.0, 1.0)
category_filter = st.sidebar.selectbox("Choose a Product Type", ['Electronics', 'Clothing', 'Books', 'Home Goods'])
st.header(" Real-Time Price Prediction")
# Ensure 'data_history' is initialized correctly
if 'data_history' not in st.session_state:
st.session_state.data_history = pd.DataFrame(columns=['timestamp', 'product_type', 'price', 'num_clicks', 'predicted_price'])
auto_simulation = st.sidebar.checkbox("Auto Simulate Data", value=True)
if auto_simulation:
# Simulation running logic without rerun
time.sleep(simulation_speed) # Delay based on user speed control
# Generate and predict new data
new_data = generate_single_data(category_filter)
predicted_price = predict_price(new_data)
new_data['predicted_price'] = predicted_price
# Add to session state data
st.session_state.data_history = pd.concat([st.session_state.data_history, new_data], ignore_index=True)
st.session_state.data_history['timestamp'] = pd.to_datetime(st.session_state.data_history['timestamp'])
# Sort and keep only the most recent 20 data points
st.session_state.data_history = st.session_state.data_history.sort_values(by='timestamp', ascending=False).head(20)
st.subheader("π Live Simulated Data and Predictions")
if not st.session_state.data_history.empty:
st.dataframe(st.session_state.data_history[['timestamp', 'product_type', 'price', 'num_clicks', 'predicted_price']])
# Plotting the predicted price
fig = px.line(
st.session_state.data_history.sort_values(by='timestamp'),
x='timestamp',
y='predicted_price',
title='Predicted Price Over Time'
)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("No data available to display.")
with tabs[1]:
st.header("π§Ύ Document Q&A Chatbot")
query = st.text_input("Ask a question about the documentation:")
if query:
with st.spinner("π Searching documents..."):
search_results = search_pinecone(query)
context = ""
for match in search_results.get('matches', []):
context += f"{match['metadata']['source']} (Chunk {match['metadata']['chunk']}): {match['metadata']['text']}\n---\n"
st.subheader("π Retrieved Context")
st.info(context if context else "No relevant documents found.")
if context:
llm_model_name = "distilbert-base-cased-distilled-squad"
with st.spinner("π‘ Generating response..."):
response = generate_response(query, context, llm_model_name)
st.subheader(" Chatbot Response:")
st.success(response)
if __name__ == "__main__":
main()
|