Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.ensemble import IsolationForest | |
from sklearn.preprocessing import StandardScaler | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import torch | |
from huggingface_hub import login | |
import os | |
st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide") | |
st.title("π Industry 5.0 | Smart Factory RAG Assistant (Open Source)") | |
# Load open-access model (Zephyr) | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") | |
model = AutoModelForCausalLM.from_pretrained( | |
"HuggingFaceH4/zephyr-7b-beta", | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
return pipeline("text-generation", model=model, tokenizer=tokenizer) | |
nlp = load_model() | |
# File Upload | |
uploaded_file = st.file_uploader("π Upload your factory CSV data", type=["csv"]) | |
if uploaded_file: | |
df = pd.read_csv(uploaded_file) | |
st.success("β File uploaded and loaded!") | |
# Data Summary | |
st.subheader("π Data Summary") | |
st.write(f"Number of rows: {df.shape[0]}") | |
st.write(f"Number of columns: {df.shape[1]}") | |
st.write("Column types:") | |
st.dataframe(df.dtypes.astype(str).rename("Type")) | |
# Descriptive Stats | |
st.subheader("π Descriptive Statistics") | |
st.dataframe(df.describe().T) | |
# Correlation Analysis | |
st.subheader("π Parameter Correlation Heatmap") | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
corr = df.corr(numeric_only=True) | |
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax) | |
st.pyplot(fig) | |
# Technical Visualizations | |
st.subheader("π Technical Graphs") | |
numeric_columns = df.select_dtypes(include='number').columns.tolist() | |
# Time Series Plot | |
selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns) | |
time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0) | |
fig2, ax2 = plt.subplots(figsize=(10, 4)) | |
if time_column != 'Index': | |
try: | |
df[time_column] = pd.to_datetime(df[time_column]) | |
df_sorted = df.sort_values(by=time_column) | |
ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column]) | |
ax2.set_xlabel(time_column) | |
except: | |
ax2.plot(df[selected_graph_column]) | |
ax2.set_xlabel("Index") | |
else: | |
ax2.plot(df[selected_graph_column]) | |
ax2.set_xlabel("Index") | |
ax2.set_title(f"Trend Over Time: {selected_graph_column}") | |
ax2.set_ylabel(selected_graph_column) | |
st.pyplot(fig2) | |
# Pairplot | |
if len(numeric_columns) > 1: | |
st.subheader("π Pairwise Parameter Relationships") | |
sampled_df = df[numeric_columns].sample(n=100, random_state=1) if len(df) > 100 else df[numeric_columns] | |
pair_fig = sns.pairplot(sampled_df) | |
st.pyplot(pair_fig) | |
# Boxplots | |
st.subheader("π Distribution & Outliers per Parameter") | |
selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns) | |
fig3, ax3 = plt.subplots() | |
sns.boxplot(y=df[selected_box_column], ax=ax3) | |
ax3.set_title(f"Boxplot: {selected_box_column}") | |
st.pyplot(fig3) | |
# Anomaly Detection | |
st.subheader("β οΈ Anomaly Detection using Isolation Forest") | |
num_df = df.select_dtypes(include='number').dropna() | |
scaler = StandardScaler() | |
X_scaled = scaler.fit_transform(num_df) | |
iso = IsolationForest(contamination=0.05) | |
df['Anomaly'] = iso.fit_predict(X_scaled) | |
anomalies = df[df['Anomaly'] == -1] | |
st.write(f"Detected {len(anomalies)} anomalies") | |
st.dataframe(anomalies.head(10)) | |
# Role-based Assistant | |
st.subheader("π§ Role-Based Decision Assistant") | |
role = st.selectbox("Select your role", ["Engineer", "Operator"]) | |
predefined_qas = { | |
"Engineer": [ | |
"Which parameters are showing strong correlation?", | |
"Are there any indicators of potential equipment failure?", | |
"How should we optimize process efficiency based on anomalies?" | |
], | |
"Operator": [ | |
"What is the most critical parameter to monitor today?", | |
"Do any sensors show abnormal values?", | |
"What immediate steps should I take due to anomalies?" | |
] | |
} | |
predefined_q = st.selectbox("Choose a predefined question", predefined_qas[role]) | |
manual_q = st.text_input("Or type your own question below (optional):") | |
question = manual_q.strip() if manual_q else predefined_q | |
if question: | |
with st.spinner("Generating insights..."): | |
summary = df.describe().round(2).to_string() | |
corr_text = corr.round(2).to_string() | |
anomaly_count = len(anomalies) | |
context = f""" | |
You are a highly skilled {role} working in a smart manufacturing facility. | |
Here is a summary of the uploaded data: | |
STATISTICS: | |
{summary} | |
CORRELATIONS: | |
{corr_text} | |
ANOMALIES: {anomaly_count} rows flagged. | |
QUESTION: {question} | |
Provide a short, focused response in your role. | |
""" | |
prompt = f"<s>[INST] {context} [/INST]" | |
result = nlp(prompt, max_new_tokens=250, do_sample=True, temperature=0.5) | |
output = result[0]['generated_text'] | |
if '[/INST]' in output: | |
answer = output.split('[/INST]')[-1].strip() | |
else: | |
answer = output.strip() | |
st.success("β Recommendation:") | |
st.markdown(f"**{answer}**") | |
else: | |
st.info("π Please upload a factory CSV data file to begin analysis.") | |