Spaces:

ramadn
/

allergen_detector_bert

Running

App Files Files Community

rdsarjito commited on 10 days ago

Commit

c0cfde6

1 Parent(s): 552cd20

2 commit

Browse files

Files changed (2) hide show

app.py +171 -186
requirements.txt +4 -7

app.py CHANGED Viewed

@@ -1,27 +1,40 @@
 import streamlit as st
-import os
-import numpy as np
-import pandas as pd
-import re
 import torch
 import torch.nn as nn
-from torch.utils.data import Dataset
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import matplotlib.pyplot as plt
-import warnings
-warnings.filterwarnings("ignore")
 # Set page config
 st.set_page_config(
-    page_title="Deteksi Alergen dalam Resep",
-    page_icon="🍲",
     layout="wide"
 )
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Clean text function
 def clean_text(text):
     # Convert dashes to spaces for better tokenization
     text = text.replace('--', ' ')
@@ -46,36 +59,38 @@ class MultilabelBertClassifier(nn.Module):
         outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
         return outputs.logits
-# Function to predict allergens in new recipes
 @st.cache_resource
-def load_model():
-    # Target columns
-    target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
-    # Initialize tokenizer
-    tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
-    # Initialize model
-    model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
-    # Load model weights if available
-    model_path = "model/alergen_model.pt"
     try:
-        # Try to load the model
-        checkpoint = torch.load(model_path, map_location=device)
-        model.load_state_dict(checkpoint['model_state_dict'])
-        st.success("Model berhasil dimuat!")
     except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
-    model.to(device)
-    model.eval()
-    return model, tokenizer, target_columns
-def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
     # Clean the text
     cleaned_text = clean_text(ingredients_text)
@@ -95,170 +110,140 @@ def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_le
     with torch.no_grad():
         outputs = model(input_ids=input_ids, attention_mask=attention_mask)
         predictions = torch.sigmoid(outputs)
-        predictions_prob = predictions.cpu().numpy()[0]
-        predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
     result = {}
     for i, target in enumerate(target_columns):
-        result[target] = {
-            'present': bool(predictions_binary[i]),
-            'probability': float(predictions_prob[i])
-        }
-    return result
-# Main application
-def main():
-    st.title("Deteksi Alergen dalam Resep")
-    st.markdown("""
-    Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
-    Alergen yang diidentifikasi meliputi:
-    - Susu
-    - Kacang
-    - Telur
-    - Makanan Laut
-    - Gandum
-    """)
-    # Sidebar for model upload
-    st.sidebar.header("Upload Model")
-    uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
     if uploaded_model is not None:
         with open("alergen_model.pt", "wb") as f:
             f.write(uploaded_model.getbuffer())
-        st.sidebar.success("Model telah diupload dan dimuat!")
-    # Load model
-    model, tokenizer, target_columns = load_model()
-    # Input area
-    st.header("Masukkan Daftar Bahan Resep")
-    ingredients = st.text_area("Bahan-bahan:", height=200,
-                              placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
-    col1, col2 = st.columns(2)
-    with col1:
-        if st.button("Deteksi Alergen", type="primary"):
-            if ingredients:
-                with st.spinner("Menganalisis bahan-bahan..."):
-                    # Clean text for display
-                    cleaned_text = clean_text(ingredients)
-                    st.markdown("### Bahan yang diproses:")
-                    st.text(cleaned_text)
-                    # Get predictions
-                    results = predict_allergens(ingredients, model, tokenizer, target_columns)
-                    # Display results
-                    st.markdown("### Hasil Deteksi Alergen:")
-                    # Create data for visualization
-                    allergens = list(results.keys())
-                    probabilities = [results[a]['probability'] for a in allergens]
-                    present = [results[a]['present'] for a in allergens]
-                    # Create a colorful table of results
-                    result_df = pd.DataFrame({
-                        'Alergen': [a.title() for a in allergens],
-                        'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
-                        'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
-                    })
-                    st.dataframe(result_df, use_container_width=True)
-                    # Display chart in the second column
-                    with col2:
-                        fig, ax = plt.subplots(figsize=(10, 6))
-                        bars = ax.bar(
-                            [a.title() for a in allergens],
-                            probabilities,
-                            color=['red' if p else 'green' for p in present]
-                        )
-                        # Add threshold line
-                        ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
-                        ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
-                        # Customize the chart
-                        ax.set_ylim(0, 1)
-                        ax.set_ylabel('Probabilitas')
-                        ax.set_title('Probabilitas Deteksi Alergen')
-                        # Add values on top of bars
-                        for bar in bars:
-                            height = bar.get_height()
-                            ax.annotate(f'{height:.2f}',
-                                       xy=(bar.get_x() + bar.get_width() / 2, height),
-                                       xytext=(0, 3),  # 3 points vertical offset
-                                       textcoords="offset points",
-                                       ha='center', va='bottom')
-                        st.pyplot(fig)
-                    # Show detailed explanation
-                    st.markdown("### Penjelasan Hasil:")
-                    detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
-                    if detected_allergens:
-                        st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
-                        # Provide specific explanation for each detected allergen
-                        for allergen in detected_allergens:
-                            if allergen.lower() == 'susu':
-                                st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
-                            elif allergen.lower() == 'kacang':
-                                st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
-                            elif allergen.lower() == 'telur':
-                                st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
-                            elif allergen.lower() == 'makanan_laut':
-                                st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
-                            elif allergen.lower() == 'gandum':
-                                st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
-                    else:
-                        st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
-                    st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
             else:
-                st.error("Mohon masukkan daftar bahan terlebih dahulu.")
-    # Examples section
-    with st.expander("Contoh Resep"):
-        st.markdown("""
-        ### Contoh Resep 1 (Mengandung Beberapa Alergen)
-        ```
-        1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
-        ```
-        ### Contoh Resep 2 (Mengandung Susu)
-        ```
-        250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
-        ```
-        ### Contoh Resep 3 (Mengandung Makanan Laut)
-        ```
-        250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
-        ```
-        """)
-    # About section
-    st.sidebar.markdown("---")
-    st.sidebar.header("Tentang")
-    st.sidebar.info("""
-    Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
-    Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
     """)
-    # Model information
-    st.sidebar.markdown("---")
-    st.sidebar.header("Informasi Model")
-    st.sidebar.markdown("""
-    - **Model Dasar**: IndoBERT
-    - **Jenis**: Multilabel Classification
-    - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
-    """)
-if __name__ == "__main__":
-    main()

 import streamlit as st
 import torch
 import torch.nn as nn
+import re
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import os
+import numpy as np
 # Set page config
 st.set_page_config(
+    page_title="Deteksi Alergen Resep",
+    page_icon="🍽️",
     layout="wide"
 )
+# App title and description
+st.title("🍽️ Deteksi Alergen Resep Makanan")
+st.markdown("""
+Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia.
+Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung.
+""")
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Define target columns (allergens)
+target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
+allergen_descriptions = {
+    'susu': 'Produk susu (milk products)',
+    'kacang': 'Kacang-kacangan (nuts)',
+    'telur': 'Telur (eggs)',
+    'makanan_laut': 'Makanan laut (seafood)',
+    'gandum': 'Gandum/gluten (wheat/gluten)'
+}
 # Clean text function
+@st.cache_data
 def clean_text(text):
     # Convert dashes to spaces for better tokenization
     text = text.replace('--', ' ')
         outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
         return outputs.logits
 @st.cache_resource
+def load_model_and_tokenizer():
     try:
+        # Initialize tokenizer
+        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
+        # Initialize model
+        model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
+        # Check if model exists locally, otherwise download from Hugging Face
+        model_path = "alergen_model.pt"
+        if os.path.exists(model_path):
+            st.info("Loading model from local storage...")
+            checkpoint = torch.load(model_path, map_location=device)
+            model.load_state_dict(checkpoint['model_state_dict'])
+        else:
+            st.warning("Model file not found. Please upload your model file.")
+        model.to(device)
+        model.eval()
+        return model, tokenizer
     except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None, None
+# Function to predict allergens in new recipes
+def predict_allergens(model, tokenizer, ingredients_text, max_length=128):
+    if not model or not tokenizer:
+        return None
     # Clean the text
     cleaned_text = clean_text(ingredients_text)
     with torch.no_grad():
         outputs = model(input_ids=input_ids, attention_mask=attention_mask)
         predictions = torch.sigmoid(outputs)
+        predictions_np = predictions.cpu().numpy()[0]
+        binary_predictions = (predictions > 0.5).float().cpu().numpy()[0]
     result = {}
+    confidence = {}
     for i, target in enumerate(target_columns):
+        result[target] = bool(binary_predictions[i])
+        confidence[target] = float(predictions_np[i])
+    return result, confidence
+# Sidebar for model upload
+with st.sidebar:
+    st.header("Model Management")
+    uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"])
     if uploaded_model is not None:
         with open("alergen_model.pt", "wb") as f:
             f.write(uploaded_model.getbuffer())
+        st.success("Model uploaded successfully!")
+        st.cache_resource.clear()
+    st.markdown("---")
+    st.markdown("### Tentang Aplikasi")
+    st.markdown("""
+    Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi
+    potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen
+    umum dalam makanan.
+    """)
+# Load model and tokenizer
+model, tokenizer = load_model_and_tokenizer()
+# Main content
+st.header("Masukkan Bahan-bahan Resep")
+# Text area for ingredients input
+ingredients = st.text_area(
+    "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
+    height=150,
+    placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..."
+)
+# Predict button
+if st.button("Deteksi Alergen", type="primary"):
+    if not ingredients:
+        st.warning("Silakan masukkan daftar bahan terlebih dahulu.")
+    elif not model:
+        st.error("Model belum tersedia. Silakan upload model terlebih dahulu.")
+    else:
+        with st.spinner("Menganalisis resep..."):
+            results, confidence = predict_allergens(model, tokenizer, ingredients)
+            if results:
+                st.header("Hasil Deteksi Alergen")
+                # Display detected allergens
+                detected_allergens = [allergen for allergen, present in results.items() if present]
+                if detected_allergens:
+                    st.markdown("### ⚠️ Alergen Terdeteksi:")
+                    # Create columns for the allergen cards
+                    cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3)
+                    for i, allergen in enumerate(detected_allergens):
+                        col_idx = i % 3
+                        with cols[col_idx]:
+                            st.markdown(f"""
+                            <div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;">
+                                <h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4>
+                                <p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p>
+                            </div>
+                            """, unsafe_allow_html=True)
+                else:
+                    st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.")
+                # Display detailed analysis
+                with st.expander("Lihat Analisis Detail"):
+                    st.markdown("### Tingkat Kepercayaan Per Alergen")
+                    for allergen in target_columns:
+                        conf_value = confidence[allergen]
+                        st.markdown(f"**{allergen_descriptions[allergen]}:** {conf_value*100:.1f}%")
+                        st.progress(conf_value)
             else:
+                st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.")
+# Example recipe section
+with st.expander("Lihat Contoh Resep"):
+    st.markdown("""
+    **Gado-gado:**
+    1 bungkus Lontong homemade
+    2 butir Telur ayam
+    2 kotak kecil Tahu coklat
+    4 butir kecil Kentang
+    2 buah Tomat merah
+    1 buah Ketimun lalap
+    4 lembar Selada keriting
+    2 lembar Kol putih
+    2 porsi Saus kacang homemade
+    4 buah Kerupuk udang goreng
+    Secukupnya emping goreng
+    2 sdt Bawang goreng
+    Secukupnya Kecap manis
     """)
+    if st.button("Gunakan Contoh Ini"):
+        st.session_state.example_used = True
+        # Will be processed in next rerun
+# Handle example
+if 'example_used' in st.session_state and st.session_state.example_used:
+    example_recipe = """1 bungkus Lontong homemade
+2 butir Telur ayam
+2 kotak kecil Tahu coklat
+4 butir kecil Kentang
+2 buah Tomat merah
+1 buah Ketimun lalap
+4 lembar Selada keriting
+2 lembar Kol putih
+2 porsi Saus kacang homemade
+4 buah Kerupuk udang goreng
+Secukupnya emping goreng
+2 sdt Bawang goreng
+Secukupnya Kecap manis"""
+    st.session_state.example_used = False
+    st.text_area(
+        "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
+        value=example_recipe,
+        height=150,
+        key="ingredients_example"
+    )
+# Footer
+st.markdown("---")
+st.markdown("*Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.*")

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
-streamlit>=1.27.0
 torch>=2.0.0
-transformers>=4.35.0
-pandas>=2.0.0
-numpy>=1.24.0
-matplotlib>=3.7.0
-scikit-learn>=1.3.0
-regex>=20

+streamlit>=1.24.0
 torch>=2.0.0
+transformers>=4.30.0
+numpy>=1.22.0
+regex>=2022.1.18