Spaces:

ramadn
/

allergen_detector_bert

Running

App Files Files Community

rdsarjito commited on 10 days ago

Commit

e88e274

1 Parent(s): c0cfde6

3 commit

Browse files

Files changed (2) hide show

app.py +47 -218
requirements.txt +9 -5

app.py CHANGED Viewed

@@ -1,249 +1,78 @@
 import streamlit as st
 import torch
 import torch.nn as nn
 import re
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import os
-import numpy as np
-# Set page config
-st.set_page_config(
-    page_title="Deteksi Alergen Resep",
-    page_icon="🍽️",
-    layout="wide"
-)
-# App title and description
-st.title("🍽️ Deteksi Alergen Resep Makanan")
-st.markdown("""
-Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia.
-Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung.
-""")
-# Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Define target columns (allergens)
-target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
-allergen_descriptions = {
-    'susu': 'Produk susu (milk products)',
-    'kacang': 'Kacang-kacangan (nuts)',
-    'telur': 'Telur (eggs)',
-    'makanan_laut': 'Makanan laut (seafood)',
-    'gandum': 'Gandum/gluten (wheat/gluten)'
-}
-# Clean text function
-@st.cache_data
-def clean_text(text):
-    # Convert dashes to spaces for better tokenization
-    text = text.replace('--', ' ')
-    # Basic cleaning
-    text = re.sub(r"http\S+", "", text)
-    text = re.sub('\n', ' ', text)
-    text = re.sub("[^a-zA-Z0-9\s]", " ", text)
-    text = re.sub(" {2,}", " ", text)
-    text = text.strip()
-    text = text.lower()
-    return text
-# Define model for multilabel classification
 class MultilabelBertClassifier(nn.Module):
     def __init__(self, model_name, num_labels):
         super(MultilabelBertClassifier, self).__init__()
         self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
-        # Replace the classification head with our own for multilabel
         self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
     def forward(self, input_ids, attention_mask):
         outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
         return outputs.logits
-@st.cache_resource
-def load_model_and_tokenizer():
-    try:
-        # Initialize tokenizer
-        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
-        # Initialize model
-        model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
-        # Check if model exists locally, otherwise download from Hugging Face
-        model_path = "alergen_model.pt"
-        if os.path.exists(model_path):
-            st.info("Loading model from local storage...")
-            checkpoint = torch.load(model_path, map_location=device)
-            model.load_state_dict(checkpoint['model_state_dict'])
-        else:
-            st.warning("Model file not found. Please upload your model file.")
-        model.to(device)
-        model.eval()
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"Error loading model: {e}")
-        return None, None
-# Function to predict allergens in new recipes
-def predict_allergens(model, tokenizer, ingredients_text, max_length=128):
-    if not model or not tokenizer:
-        return None
-    # Clean the text
-    cleaned_text = clean_text(ingredients_text)
-    # Tokenize
     encoding = tokenizer.encode_plus(
-        cleaned_text,
         add_special_tokens=True,
-        max_length=max_length,
-        truncation=True,
         return_tensors='pt',
-        padding='max_length'
     )
     input_ids = encoding['input_ids'].to(device)
     attention_mask = encoding['attention_mask'].to(device)
-    with torch.no_grad():
-        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
-        predictions = torch.sigmoid(outputs)
-        predictions_np = predictions.cpu().numpy()[0]
-        binary_predictions = (predictions > 0.5).float().cpu().numpy()[0]
-    result = {}
-    confidence = {}
-    for i, target in enumerate(target_columns):
-        result[target] = bool(binary_predictions[i])
-        confidence[target] = float(predictions_np[i])
-    return result, confidence
-# Sidebar for model upload
-with st.sidebar:
-    st.header("Model Management")
-    uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"])
-    if uploaded_model is not None:
-        with open("alergen_model.pt", "wb") as f:
-            f.write(uploaded_model.getbuffer())
-        st.success("Model uploaded successfully!")
-        st.cache_resource.clear()
-    st.markdown("---")
-    st.markdown("### Tentang Aplikasi")
-    st.markdown("""
-    Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi
-    potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen
-    umum dalam makanan.
-    """)
-# Load model and tokenizer
-model, tokenizer = load_model_and_tokenizer()
-# Main content
-st.header("Masukkan Bahan-bahan Resep")
-# Text area for ingredients input
-ingredients = st.text_area(
-    "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
-    height=150,
-    placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..."
-)
-# Predict button
-if st.button("Deteksi Alergen", type="primary"):
-    if not ingredients:
-        st.warning("Silakan masukkan daftar bahan terlebih dahulu.")
-    elif not model:
-        st.error("Model belum tersedia. Silakan upload model terlebih dahulu.")
     else:
-        with st.spinner("Menganalisis resep..."):
-            results, confidence = predict_allergens(model, tokenizer, ingredients)
-            if results:
-                st.header("Hasil Deteksi Alergen")
-                # Display detected allergens
-                detected_allergens = [allergen for allergen, present in results.items() if present]
-                if detected_allergens:
-                    st.markdown("### ⚠️ Alergen Terdeteksi:")
-                    # Create columns for the allergen cards
-                    cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3)
-                    for i, allergen in enumerate(detected_allergens):
-                        col_idx = i % 3
-                        with cols[col_idx]:
-                            st.markdown(f"""
-                            <div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;">
-                                <h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4>
-                                <p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p>
-                            </div>
-                            """, unsafe_allow_html=True)
-                else:
-                    st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.")
-                # Display detailed analysis
-                with st.expander("Lihat Analisis Detail"):
-                    st.markdown("### Tingkat Kepercayaan Per Alergen")
-                    for allergen in target_columns:
-                        conf_value = confidence[allergen]
-                        st.markdown(f"**{allergen_descriptions[allergen]}:** {conf_value*100:.1f}%")
-                        st.progress(conf_value)
-            else:
-                st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.")
-# Example recipe section
-with st.expander("Lihat Contoh Resep"):
-    st.markdown("""
-    **Gado-gado:**
-    1 bungkus Lontong homemade
-    2 butir Telur ayam
-    2 kotak kecil Tahu coklat
-    4 butir kecil Kentang
-    2 buah Tomat merah
-    1 buah Ketimun lalap
-    4 lembar Selada keriting
-    2 lembar Kol putih
-    2 porsi Saus kacang homemade
-    4 buah Kerupuk udang goreng
-    Secukupnya emping goreng
-    2 sdt Bawang goreng
-    Secukupnya Kecap manis
-    """)
-    if st.button("Gunakan Contoh Ini"):
-        st.session_state.example_used = True
-        # Will be processed in next rerun
-# Handle example
-if 'example_used' in st.session_state and st.session_state.example_used:
-    example_recipe = """1 bungkus Lontong homemade
-2 butir Telur ayam
-2 kotak kecil Tahu coklat
-4 butir kecil Kentang
-2 buah Tomat merah
-1 buah Ketimun lalap
-4 lembar Selada keriting
-2 lembar Kol putih
-2 porsi Saus kacang homemade
-4 buah Kerupuk udang goreng
-Secukupnya emping goreng
-2 sdt Bawang goreng
-Secukupnya Kecap manis"""
-    st.session_state.example_used = False
-    st.text_area(
-        "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
-        value=example_recipe,
-        height=150,
-        key="ingredients_example"
-    )
-# Footer
-st.markdown("---")
-st.markdown("*Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.*")

 import streamlit as st
 import torch
 import torch.nn as nn
+import numpy as np
+import pandas as pd
 import re
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load tokenizer dan model
+MODEL_PATH = 'model/alergen_model.pt'
+MODEL_NAME = 'indobenchmark/indobert-base-p1'
+TARGET_COLUMNS = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
+MAX_LEN = 128
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 class MultilabelBertClassifier(nn.Module):
     def __init__(self, model_name, num_labels):
         super(MultilabelBertClassifier, self).__init__()
         self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
         self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
     def forward(self, input_ids, attention_mask):
         outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
         return outputs.logits
+model = MultilabelBertClassifier(MODEL_NAME, len(TARGET_COLUMNS))
+model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
+model.to(device)
+model.eval()
+# Fungsi preprocessing
+def clean_text(text):
+    text = text.replace('--', ' ')
+    text = re.sub(r"http\S+", "", text)
+    text = re.sub('\n', ' ', text)
+    text = re.sub("[^a-zA-Z0-9\s]", " ", text)
+    text = re.sub(" {2,}", " ", text)
+    text = text.strip()
+    text = text.lower()
+    return text
+# Fungsi prediksi
+def predict(text):
+    cleaned = clean_text(text)
     encoding = tokenizer.encode_plus(
+        cleaned,
         add_special_tokens=True,
+        max_length=MAX_LEN,
         return_tensors='pt',
+        padding='max_length',
+        truncation=True
     )
     input_ids = encoding['input_ids'].to(device)
     attention_mask = encoding['attention_mask'].to(device)
+    with torch.no_grad():
+        logits = model(input_ids=input_ids, attention_mask=attention_mask)
+        probs = torch.sigmoid(logits).cpu().numpy().flatten()
+        results = {TARGET_COLUMNS[i]: float(probs[i]) for i in range(len(TARGET_COLUMNS))}
+        return results
+# STREAMLIT UI
+st.title("🔍 Deteksi Alergen dari Bahan Makanan")
+st.markdown("Masukkan daftar bahan makanan, dan sistem akan memprediksi kemungkinan alergen.")
+user_input = st.text_area("🧾 Bahan makanan (contoh: 2 butir telur, 1 gelas susu, kacang tanah...)")
+if st.button("Prediksi Alergen"):
+    if user_input.strip() == "":
+        st.warning("Silakan masukkan bahan makanan terlebih dahulu.")
     else:
+        with st.spinner("Memproses..."):
+            predictions = predict(user_input)
+            st.subheader("📊 Hasil Prediksi:")
+            for allergen, score in predictions.items():
+                st.write(f"- **{allergen}**: {'✅ Terdeteksi' if score > 0.5 else '❌ Tidak terdeteksi'} (Probabilitas: {score:.2f})")

requirements.txt CHANGED Viewed

@@ -1,5 +1,9 @@
-streamlit>=1.24.0
-torch>=2.0.0
-transformers>=4.30.0
-numpy>=1.22.0
-regex>=2022.1.18

+streamlit
+pandas
+numpy
+torch
+transformers
+scikit-learn
+tqdm
+matplotlib
+sentencepiece