import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.ensemble import IsolationForest from sklearn.preprocessing import StandardScaler from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch from huggingface_hub import login import os st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide") st.title("🏠 Industry 5.0 | Smart Factory RAG Assistant (Open Source)") # Load open-access model (Zephyr) @st.cache_resource(show_spinner=True) def load_model(): tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") model = AutoModelForCausalLM.from_pretrained( "HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.float16, device_map="auto" ) return pipeline("text-generation", model=model, tokenizer=tokenizer) nlp = load_model() # File Upload uploaded_file = st.file_uploader("📄 Upload your factory CSV data", type=["csv"]) if uploaded_file: df = pd.read_csv(uploaded_file) st.success("✅ File uploaded and loaded!") # Data Summary st.subheader("📃 Data Summary") st.write(f"Number of rows: {df.shape[0]}") st.write(f"Number of columns: {df.shape[1]}") st.write("Column types:") st.dataframe(df.dtypes.astype(str).rename("Type")) # Descriptive Stats st.subheader("📊 Descriptive Statistics") st.dataframe(df.describe().T) # Correlation Analysis st.subheader("🔗 Parameter Correlation Heatmap") fig, ax = plt.subplots(figsize=(10, 6)) corr = df.corr(numeric_only=True) sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax) st.pyplot(fig) # Technical Visualizations st.subheader("📊 Technical Graphs") numeric_columns = df.select_dtypes(include='number').columns.tolist() # Time Series Plot selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns) time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0) fig2, ax2 = plt.subplots(figsize=(10, 4)) if time_column != 'Index': try: df[time_column] = pd.to_datetime(df[time_column]) df_sorted = df.sort_values(by=time_column) ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column]) ax2.set_xlabel(time_column) except: ax2.plot(df[selected_graph_column]) ax2.set_xlabel("Index") else: ax2.plot(df[selected_graph_column]) ax2.set_xlabel("Index") ax2.set_title(f"Trend Over Time: {selected_graph_column}") ax2.set_ylabel(selected_graph_column) st.pyplot(fig2) # Pairplot if len(numeric_columns) > 1: st.subheader("🔄 Pairwise Parameter Relationships") sampled_df = df[numeric_columns].sample(n=100, random_state=1) if len(df) > 100 else df[numeric_columns] pair_fig = sns.pairplot(sampled_df) st.pyplot(pair_fig) # Boxplots st.subheader("📈 Distribution & Outliers per Parameter") selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns) fig3, ax3 = plt.subplots() sns.boxplot(y=df[selected_box_column], ax=ax3) ax3.set_title(f"Boxplot: {selected_box_column}") st.pyplot(fig3) # Anomaly Detection st.subheader("⚠️ Anomaly Detection using Isolation Forest") num_df = df.select_dtypes(include='number').dropna() scaler = StandardScaler() X_scaled = scaler.fit_transform(num_df) iso = IsolationForest(contamination=0.05) df['Anomaly'] = iso.fit_predict(X_scaled) anomalies = df[df['Anomaly'] == -1] st.write(f"Detected {len(anomalies)} anomalies") st.dataframe(anomalies.head(10)) # Role-based Assistant st.subheader("🧠 Role-Based Decision Assistant") role = st.selectbox("Select your role", ["Engineer", "Operator"]) predefined_qas = { "Engineer": [ "Which parameters are showing strong correlation?", "Are there any indicators of potential equipment failure?", "How should we optimize process efficiency based on anomalies?" ], "Operator": [ "What is the most critical parameter to monitor today?", "Do any sensors show abnormal values?", "What immediate steps should I take due to anomalies?" ] } predefined_q = st.selectbox("Choose a predefined question", predefined_qas[role]) manual_q = st.text_input("Or type your own question below (optional):") question = manual_q.strip() if manual_q else predefined_q if question: with st.spinner("Generating insights..."): summary = df.describe().round(2).to_string() corr_text = corr.round(2).to_string() anomaly_count = len(anomalies) context = f""" You are a highly skilled {role} working in a smart manufacturing facility. Here is a summary of the uploaded data: STATISTICS: {summary} CORRELATIONS: {corr_text} ANOMALIES: {anomaly_count} rows flagged. QUESTION: {question} Provide a short, focused response in your role. """ prompt = f"[INST] {context} [/INST]" result = nlp(prompt, max_new_tokens=250, do_sample=True, temperature=0.5) output = result[0]['generated_text'] if '[/INST]' in output: answer = output.split('[/INST]')[-1].strip() else: answer = output.strip() st.success("✅ Recommendation:") st.markdown(f"**{answer}**") else: st.info("📂 Please upload a factory CSV data file to begin analysis.")