File size: 6,497 Bytes
39a7596
 
 
6500f58
39a7596
 
 
 
6500f58
39a7596
0fe146c
6500f58
 
 
 
 
 
 
 
 
 
 
0d93346
 
6500f58
0d93346
8fe2bec
6500f58
 
 
8fe2bec
6500f58
 
8fe2bec
6500f58
 
 
 
0d93346
6500f58
0d93346
6500f58
39a7596
6500f58
a31fbae
8fe2bec
6500f58
 
39a7596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6500f58
 
39a7596
 
 
6500f58
39a7596
 
6500f58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d93346
c05a6e0
0d93346
c05a6e0
0d93346
 
 
 
 
 
c05a6e0
c387f36
c05a6e0
c387f36
 
80cb8a1
c1d3dad
 
 
80cb8a1
c1d3dad
80cb8a1
 
c387f36
 
 
80cb8a1
 
c387f36
 
80cb8a1
 
c387f36
80cb8a1
c387f36
 
80cb8a1
 
 
c387f36
 
 
 
 
 
 
80cb8a1
 
0d93346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c05a6e0
0d93346
6500f58
 
80cb8a1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import streamlit as st
import pandas as pd
import numpy as np
import time
from datetime import datetime
import joblib
import plotly.express as px
import pinecone
from pinecone import ServerlessSpec
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import os

# --- Load Trained Model and Preprocessor ---
try:
    model = joblib.load('price_prediction_model.joblib')
    preprocessor = joblib.load('price_preprocessor.joblib')
except FileNotFoundError:
    st.error("Model or preprocessor file not found. Please run the training script first.")
    st.stop()

# --- Pinecone Setup ---
PINECONE_API_KEY = "pcsk_5CWCFt_9nPiRVa65nG4rs6ZC2cqnEthBSvcEhuQa53TK7FN6rwYZW1qQpacqoAvGUjNCFU"
PINECONE_ENVIRONMENT = "aped-4627-b74a"
PINECONE_INDEX_NAME = "document-qa-index"
NAMESPACE = "your-namespace"

# --- Embedding Model ---
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
embedding_model = SentenceTransformer(EMBEDDING_MODEL)

# --- Initialize Pinecone Connection ---
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)

# Check if the index exists, if not, create it
if PINECONE_INDEX_NAME not in pc.list_indexes().names():
    pc.create_index(
        name=PINECONE_INDEX_NAME,
        dimension=384,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws', region='us-west-2')
    )

# Connect to Pinecone index
index = pc.Index(PINECONE_INDEX_NAME)

# --- Generate Sample Data ---
def generate_single_data(product_type):
    payment_methods = ['Credit Card', 'Debit Card', 'UPI', 'Net Banking']
    timestamp = datetime.now()
    price = round(np.random.uniform(10, 500), 2)
    num_clicks = np.random.randint(1, 100)
    payment_method = np.random.choice(payment_methods)
    customer_id = np.random.randint(1000, 9999)
    return pd.DataFrame([{
        'timestamp': timestamp,
        'product_type': product_type,
        'price': price,
        'num_clicks': num_clicks,
        'payment_method': payment_method,
        'customer_id': customer_id
    }])

def predict_price(data):
    predict_df = data[['product_type', 'num_clicks']]
    processed_data = preprocessor.transform(predict_df)
    processed_df = pd.DataFrame(processed_data, columns=preprocessor.get_feature_names_out(['product_type', 'num_clicks']))
    prediction = model.predict(processed_df)
    return prediction[0]

# --- Pinecone Search ---
def search_pinecone(query, top_k=3):
    query_embedding = embedding_model.encode([query])[0]
    try:
        results = index.query(vector=query_embedding.tolist(), top_k=top_k, include_metadata=True, namespace=NAMESPACE)
        return results
    except Exception as e:
        st.error(f"Error during Pinecone search: {e}")
        return {}

# --- QA Response Generator ---
def generate_response(query, context, llm_model=""):
    try:
        pipe = pipeline("question-answering", model=llm_model)
        result = pipe(question=query, context=context)
        return result['answer'].strip()
    except Exception as e:
        st.error(f"Error generating response: {e}")
        return "Sorry, I could not generate a response."

# --- Streamlit App ---
def main():
    st.set_page_config(page_title="Real-Time Sales & Chatbot", layout="wide")
    st.title("Real-Time Sales Dashboard +  Chatbot")

    tabs = st.tabs(["Dashboard", " Chatbot"])

    with tabs[0]:
        st.sidebar.header("βš™οΈ Controls")
        simulation_speed = st.sidebar.slider("Simulation Speed (seconds)", 0.1, 5.0, 1.0)
        category_filter = st.sidebar.selectbox("Choose a Product Type", ['Electronics', 'Clothing', 'Books', 'Home Goods'])

        st.header(" Real-Time Price Prediction")

        #  Ensure 'data_history' is initialized correctly
        if 'data_history' not in st.session_state:
            st.session_state.data_history = pd.DataFrame(columns=['timestamp', 'product_type', 'price', 'num_clicks', 'predicted_price'])

        auto_simulation = st.sidebar.checkbox("Auto Simulate Data", value=True)

        if auto_simulation:
            # Simulation running logic without rerun
            time.sleep(simulation_speed)  # Delay based on user speed control

            # Generate and predict new data
            new_data = generate_single_data(category_filter)
            predicted_price = predict_price(new_data)
            new_data['predicted_price'] = predicted_price

            # Add to session state data
            st.session_state.data_history = pd.concat([st.session_state.data_history, new_data], ignore_index=True)
            st.session_state.data_history['timestamp'] = pd.to_datetime(st.session_state.data_history['timestamp'])

            # Sort and keep only the most recent 20 data points
            st.session_state.data_history = st.session_state.data_history.sort_values(by='timestamp', ascending=False).head(20)

        st.subheader("πŸ“ˆ Live Simulated Data and Predictions")
        if not st.session_state.data_history.empty:
            st.dataframe(st.session_state.data_history[['timestamp', 'product_type', 'price', 'num_clicks', 'predicted_price']])
        
            # Plotting the predicted price
            fig = px.line(
                st.session_state.data_history.sort_values(by='timestamp'),
                x='timestamp',
                y='predicted_price',
                title='Predicted Price Over Time'
            )
            st.plotly_chart(fig, use_container_width=True)
        else:
            st.warning("No data available to display.")

    with tabs[1]:
        st.header("🧾 Document Q&A Chatbot")
        query = st.text_input("Ask a question about the documentation:")
        if query:
            with st.spinner("πŸ” Searching documents..."):
                search_results = search_pinecone(query)
                context = ""
                for match in search_results.get('matches', []):
                    context += f"{match['metadata']['source']} (Chunk {match['metadata']['chunk']}): {match['metadata']['text']}\n---\n"

            st.subheader("πŸ“š Retrieved Context")
            st.info(context if context else "No relevant documents found.")

            if context:
                llm_model_name = "distilbert-base-cased-distilled-squad"
                with st.spinner("πŸ’‘ Generating response..."):
                    response = generate_response(query, context, llm_model_name)
                    st.subheader(" Chatbot Response:")
                    st.success(response)

if __name__ == "__main__":
    main()