File size: 5,697 Bytes
5ea6795
 
9cf3310
88bc3e2
c2295d1
88bc3e2
c966df4
 
cc8e7f5
 
88bc3e2
 
 
a75c2d4
c966df4
cc8e7f5
c966df4
 
cc8e7f5
c966df4
cc8e7f5
c966df4
 
 
 
 
 
5ea6795
eb0d83d
c966df4
5ea6795
6141da1
 
88bc3e2
 
fc24d80
 
 
 
 
 
88bc3e2
 
 
 
 
 
 
 
 
 
 
c2295d1
b9c56e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc24d80
b9c56e7
 
 
 
 
 
 
 
 
 
 
eb0d83d
88bc3e2
 
 
 
 
 
 
 
 
 
 
c966df4
88bc3e2
 
c77c2e4
 
 
 
 
 
 
 
 
 
 
 
 
 
d53f767
 
 
ff73cbe
eb0d83d
c966df4
fc24d80
 
88bc3e2
 
 
 
 
 
 
fc24d80
88bc3e2
 
fc24d80
88bc3e2
 
fc24d80
88bc3e2
 
fc24d80
c966df4
 
d53f767
 
c966df4
 
 
 
 
 
 
 
ff73cbe
6141da1
a75c2d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from huggingface_hub import login
import os

st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide")

st.title("🏠 Industry 5.0 | Smart Factory RAG Assistant (Open Source)")

# Load open-access model (Zephyr)
@st.cache_resource(show_spinner=True)
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
    model = AutoModelForCausalLM.from_pretrained(
        "HuggingFaceH4/zephyr-7b-beta",
        torch_dtype=torch.float16,
        device_map="auto"
    )
    return pipeline("text-generation", model=model, tokenizer=tokenizer)

nlp = load_model()

# File Upload
uploaded_file = st.file_uploader("πŸ“„ Upload your factory CSV data", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.success("βœ… File uploaded and loaded!")

    # Data Summary
    st.subheader("πŸ“ƒ Data Summary")
    st.write(f"Number of rows: {df.shape[0]}")
    st.write(f"Number of columns: {df.shape[1]}")
    st.write("Column types:")
    st.dataframe(df.dtypes.astype(str).rename("Type"))

    # Descriptive Stats
    st.subheader("πŸ“Š Descriptive Statistics")
    st.dataframe(df.describe().T)

    # Correlation Analysis
    st.subheader("πŸ”— Parameter Correlation Heatmap")
    fig, ax = plt.subplots(figsize=(10, 6))
    corr = df.corr(numeric_only=True)
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax)
    st.pyplot(fig)

    # Technical Visualizations
    st.subheader("πŸ“Š Technical Graphs")
    numeric_columns = df.select_dtypes(include='number').columns.tolist()

    # Time Series Plot
    selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns)
    time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0)

    fig2, ax2 = plt.subplots(figsize=(10, 4))
    if time_column != 'Index':
        try:
            df[time_column] = pd.to_datetime(df[time_column])
            df_sorted = df.sort_values(by=time_column)
            ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column])
            ax2.set_xlabel(time_column)
        except:
            ax2.plot(df[selected_graph_column])
            ax2.set_xlabel("Index")
    else:
        ax2.plot(df[selected_graph_column])
        ax2.set_xlabel("Index")
    ax2.set_title(f"Trend Over Time: {selected_graph_column}")
    ax2.set_ylabel(selected_graph_column)
    st.pyplot(fig2)

    # Pairplot
    if len(numeric_columns) > 1:
        st.subheader("πŸ”„ Pairwise Parameter Relationships")
        sampled_df = df[numeric_columns].sample(n=100, random_state=1) if len(df) > 100 else df[numeric_columns]
        pair_fig = sns.pairplot(sampled_df)
        st.pyplot(pair_fig)

    # Boxplots
    st.subheader("πŸ“ˆ Distribution & Outliers per Parameter")
    selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns)
    fig3, ax3 = plt.subplots()
    sns.boxplot(y=df[selected_box_column], ax=ax3)
    ax3.set_title(f"Boxplot: {selected_box_column}")
    st.pyplot(fig3)

    # Anomaly Detection
    st.subheader("⚠️ Anomaly Detection using Isolation Forest")
    num_df = df.select_dtypes(include='number').dropna()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(num_df)

    iso = IsolationForest(contamination=0.05)
    df['Anomaly'] = iso.fit_predict(X_scaled)
    anomalies = df[df['Anomaly'] == -1]
    st.write(f"Detected {len(anomalies)} anomalies")
    st.dataframe(anomalies.head(10))

    # Role-based Assistant
    st.subheader("🧠 Role-Based Decision Assistant")
    role = st.selectbox("Select your role", ["Engineer", "Operator"])

    predefined_qas = {
        "Engineer": [
            "Which parameters are showing strong correlation?",
            "Are there any indicators of potential equipment failure?",
            "How should we optimize process efficiency based on anomalies?"
        ],
        "Operator": [
            "What is the most critical parameter to monitor today?",
            "Do any sensors show abnormal values?",
            "What immediate steps should I take due to anomalies?"
        ]
    }

    predefined_q = st.selectbox("Choose a predefined question", predefined_qas[role])
    manual_q = st.text_input("Or type your own question below (optional):")
    question = manual_q.strip() if manual_q else predefined_q

    if question:
        with st.spinner("Generating insights..."):
            summary = df.describe().round(2).to_string()
            corr_text = corr.round(2).to_string()
            anomaly_count = len(anomalies)

            context = f"""
You are a highly skilled {role} working in a smart manufacturing facility.

Here is a summary of the uploaded data:

STATISTICS:
{summary}

CORRELATIONS:
{corr_text}

ANOMALIES: {anomaly_count} rows flagged.

QUESTION: {question}
Provide a short, focused response in your role.
"""
            prompt = f"<s>[INST] {context} [/INST]"
            result = nlp(prompt, max_new_tokens=250, do_sample=True, temperature=0.5)
            output = result[0]['generated_text']

            if '[/INST]' in output:
                answer = output.split('[/INST]')[-1].strip()
            else:
                answer = output.strip()

            st.success("βœ… Recommendation:")
            st.markdown(f"**{answer}**")

else:
    st.info("πŸ“‚ Please upload a factory CSV data file to begin analysis.")