Spaces:
Sleeping
Sleeping
File size: 5,697 Bytes
5ea6795 9cf3310 88bc3e2 c2295d1 88bc3e2 c966df4 cc8e7f5 88bc3e2 a75c2d4 c966df4 cc8e7f5 c966df4 cc8e7f5 c966df4 cc8e7f5 c966df4 5ea6795 eb0d83d c966df4 5ea6795 6141da1 88bc3e2 fc24d80 88bc3e2 c2295d1 b9c56e7 fc24d80 b9c56e7 eb0d83d 88bc3e2 c966df4 88bc3e2 c77c2e4 d53f767 ff73cbe eb0d83d c966df4 fc24d80 88bc3e2 fc24d80 88bc3e2 fc24d80 88bc3e2 fc24d80 88bc3e2 fc24d80 c966df4 d53f767 c966df4 ff73cbe 6141da1 a75c2d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from huggingface_hub import login
import os
st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide")
st.title("π Industry 5.0 | Smart Factory RAG Assistant (Open Source)")
# Load open-access model (Zephyr)
@st.cache_resource(show_spinner=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceH4/zephyr-7b-beta",
torch_dtype=torch.float16,
device_map="auto"
)
return pipeline("text-generation", model=model, tokenizer=tokenizer)
nlp = load_model()
# File Upload
uploaded_file = st.file_uploader("π Upload your factory CSV data", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file)
st.success("β
File uploaded and loaded!")
# Data Summary
st.subheader("π Data Summary")
st.write(f"Number of rows: {df.shape[0]}")
st.write(f"Number of columns: {df.shape[1]}")
st.write("Column types:")
st.dataframe(df.dtypes.astype(str).rename("Type"))
# Descriptive Stats
st.subheader("π Descriptive Statistics")
st.dataframe(df.describe().T)
# Correlation Analysis
st.subheader("π Parameter Correlation Heatmap")
fig, ax = plt.subplots(figsize=(10, 6))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax)
st.pyplot(fig)
# Technical Visualizations
st.subheader("π Technical Graphs")
numeric_columns = df.select_dtypes(include='number').columns.tolist()
# Time Series Plot
selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns)
time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0)
fig2, ax2 = plt.subplots(figsize=(10, 4))
if time_column != 'Index':
try:
df[time_column] = pd.to_datetime(df[time_column])
df_sorted = df.sort_values(by=time_column)
ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column])
ax2.set_xlabel(time_column)
except:
ax2.plot(df[selected_graph_column])
ax2.set_xlabel("Index")
else:
ax2.plot(df[selected_graph_column])
ax2.set_xlabel("Index")
ax2.set_title(f"Trend Over Time: {selected_graph_column}")
ax2.set_ylabel(selected_graph_column)
st.pyplot(fig2)
# Pairplot
if len(numeric_columns) > 1:
st.subheader("π Pairwise Parameter Relationships")
sampled_df = df[numeric_columns].sample(n=100, random_state=1) if len(df) > 100 else df[numeric_columns]
pair_fig = sns.pairplot(sampled_df)
st.pyplot(pair_fig)
# Boxplots
st.subheader("π Distribution & Outliers per Parameter")
selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns)
fig3, ax3 = plt.subplots()
sns.boxplot(y=df[selected_box_column], ax=ax3)
ax3.set_title(f"Boxplot: {selected_box_column}")
st.pyplot(fig3)
# Anomaly Detection
st.subheader("β οΈ Anomaly Detection using Isolation Forest")
num_df = df.select_dtypes(include='number').dropna()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(num_df)
iso = IsolationForest(contamination=0.05)
df['Anomaly'] = iso.fit_predict(X_scaled)
anomalies = df[df['Anomaly'] == -1]
st.write(f"Detected {len(anomalies)} anomalies")
st.dataframe(anomalies.head(10))
# Role-based Assistant
st.subheader("π§ Role-Based Decision Assistant")
role = st.selectbox("Select your role", ["Engineer", "Operator"])
predefined_qas = {
"Engineer": [
"Which parameters are showing strong correlation?",
"Are there any indicators of potential equipment failure?",
"How should we optimize process efficiency based on anomalies?"
],
"Operator": [
"What is the most critical parameter to monitor today?",
"Do any sensors show abnormal values?",
"What immediate steps should I take due to anomalies?"
]
}
predefined_q = st.selectbox("Choose a predefined question", predefined_qas[role])
manual_q = st.text_input("Or type your own question below (optional):")
question = manual_q.strip() if manual_q else predefined_q
if question:
with st.spinner("Generating insights..."):
summary = df.describe().round(2).to_string()
corr_text = corr.round(2).to_string()
anomaly_count = len(anomalies)
context = f"""
You are a highly skilled {role} working in a smart manufacturing facility.
Here is a summary of the uploaded data:
STATISTICS:
{summary}
CORRELATIONS:
{corr_text}
ANOMALIES: {anomaly_count} rows flagged.
QUESTION: {question}
Provide a short, focused response in your role.
"""
prompt = f"<s>[INST] {context} [/INST]"
result = nlp(prompt, max_new_tokens=250, do_sample=True, temperature=0.5)
output = result[0]['generated_text']
if '[/INST]' in output:
answer = output.split('[/INST]')[-1].strip()
else:
answer = output.strip()
st.success("β
Recommendation:")
st.markdown(f"**{answer}**")
else:
st.info("π Please upload a factory CSV data file to begin analysis.")
|