Spaces:

ramadn
/

allergen_detector_bert

Running

App Files Files Community

rdsarjito commited on 11 days ago

Commit

552cd20

1 Parent(s): 87227dc

1 commit

Browse files

Files changed (3) hide show

app.py +264 -0
model/alergen_model.pt +3 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import streamlit as st
+import os
+import numpy as np
+import pandas as pd
+import re
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings("ignore")
+# Set page config
+st.set_page_config(
+    page_title="Deteksi Alergen dalam Resep",
+    page_icon="🍲",
+    layout="wide"
+)
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Clean text function
+def clean_text(text):
+    # Convert dashes to spaces for better tokenization
+    text = text.replace('--', ' ')
+    # Basic cleaning
+    text = re.sub(r"http\S+", "", text)
+    text = re.sub('\n', ' ', text)
+    text = re.sub("[^a-zA-Z0-9\s]", " ", text)
+    text = re.sub(" {2,}", " ", text)
+    text = text.strip()
+    text = text.lower()
+    return text
+# Define model for multilabel classification
+class MultilabelBertClassifier(nn.Module):
+    def __init__(self, model_name, num_labels):
+        super(MultilabelBertClassifier, self).__init__()
+        self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
+        # Replace the classification head with our own for multilabel
+        self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        return outputs.logits
+# Function to predict allergens in new recipes
+@st.cache_resource
+def load_model():
+    # Target columns
+    target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
+    # Initialize tokenizer
+    tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
+    # Initialize model
+    model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
+    # Load model weights if available
+    model_path = "model/alergen_model.pt"
+    try:
+        # Try to load the model
+        checkpoint = torch.load(model_path, map_location=device)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        st.success("Model berhasil dimuat!")
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
+    model.to(device)
+    model.eval()
+    return model, tokenizer, target_columns
+def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
+    # Clean the text
+    cleaned_text = clean_text(ingredients_text)
+    # Tokenize
+    encoding = tokenizer.encode_plus(
+        cleaned_text,
+        add_special_tokens=True,
+        max_length=max_length,
+        truncation=True,
+        return_tensors='pt',
+        padding='max_length'
+    )
+    input_ids = encoding['input_ids'].to(device)
+    attention_mask = encoding['attention_mask'].to(device)
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        predictions = torch.sigmoid(outputs)
+        predictions_prob = predictions.cpu().numpy()[0]
+        predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
+    result = {}
+    for i, target in enumerate(target_columns):
+        result[target] = {
+            'present': bool(predictions_binary[i]),
+            'probability': float(predictions_prob[i])
+        }
+    return result
+# Main application
+def main():
+    st.title("Deteksi Alergen dalam Resep")
+    st.markdown("""
+    Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
+    Alergen yang diidentifikasi meliputi:
+    - Susu
+    - Kacang
+    - Telur
+    - Makanan Laut
+    - Gandum
+    """)
+    # Sidebar for model upload
+    st.sidebar.header("Upload Model")
+    uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
+    if uploaded_model is not None:
+        with open("alergen_model.pt", "wb") as f:
+            f.write(uploaded_model.getbuffer())
+        st.sidebar.success("Model telah diupload dan dimuat!")
+    # Load model
+    model, tokenizer, target_columns = load_model()
+    # Input area
+    st.header("Masukkan Daftar Bahan Resep")
+    ingredients = st.text_area("Bahan-bahan:", height=200,
+                              placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Deteksi Alergen", type="primary"):
+            if ingredients:
+                with st.spinner("Menganalisis bahan-bahan..."):
+                    # Clean text for display
+                    cleaned_text = clean_text(ingredients)
+                    st.markdown("### Bahan yang diproses:")
+                    st.text(cleaned_text)
+                    # Get predictions
+                    results = predict_allergens(ingredients, model, tokenizer, target_columns)
+                    # Display results
+                    st.markdown("### Hasil Deteksi Alergen:")
+                    # Create data for visualization
+                    allergens = list(results.keys())
+                    probabilities = [results[a]['probability'] for a in allergens]
+                    present = [results[a]['present'] for a in allergens]
+                    # Create a colorful table of results
+                    result_df = pd.DataFrame({
+                        'Alergen': [a.title() for a in allergens],
+                        'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
+                        'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
+                    })
+                    st.dataframe(result_df, use_container_width=True)
+                    # Display chart in the second column
+                    with col2:
+                        fig, ax = plt.subplots(figsize=(10, 6))
+                        bars = ax.bar(
+                            [a.title() for a in allergens],
+                            probabilities,
+                            color=['red' if p else 'green' for p in present]
+                        )
+                        # Add threshold line
+                        ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
+                        ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
+                        # Customize the chart
+                        ax.set_ylim(0, 1)
+                        ax.set_ylabel('Probabilitas')
+                        ax.set_title('Probabilitas Deteksi Alergen')
+                        # Add values on top of bars
+                        for bar in bars:
+                            height = bar.get_height()
+                            ax.annotate(f'{height:.2f}',
+                                       xy=(bar.get_x() + bar.get_width() / 2, height),
+                                       xytext=(0, 3),  # 3 points vertical offset
+                                       textcoords="offset points",
+                                       ha='center', va='bottom')
+                        st.pyplot(fig)
+                    # Show detailed explanation
+                    st.markdown("### Penjelasan Hasil:")
+                    detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
+                    if detected_allergens:
+                        st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
+                        # Provide specific explanation for each detected allergen
+                        for allergen in detected_allergens:
+                            if allergen.lower() == 'susu':
+                                st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
+                            elif allergen.lower() == 'kacang':
+                                st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
+                            elif allergen.lower() == 'telur':
+                                st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
+                            elif allergen.lower() == 'makanan_laut':
+                                st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
+                            elif allergen.lower() == 'gandum':
+                                st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
+                    else:
+                        st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
+                    st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
+            else:
+                st.error("Mohon masukkan daftar bahan terlebih dahulu.")
+    # Examples section
+    with st.expander("Contoh Resep"):
+        st.markdown("""
+        ### Contoh Resep 1 (Mengandung Beberapa Alergen)
+        ```
+        1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
+        ```
+        ### Contoh Resep 2 (Mengandung Susu)
+        ```
+        250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
+        ```
+        ### Contoh Resep 3 (Mengandung Makanan Laut)
+        ```
+        250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
+        ```
+        """)
+    # About section
+    st.sidebar.markdown("---")
+    st.sidebar.header("Tentang")
+    st.sidebar.info("""
+    Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
+    Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
+    """)
+    # Model information
+    st.sidebar.markdown("---")
+    st.sidebar.header("Informasi Model")
+    st.sidebar.markdown("""
+    - **Model Dasar**: IndoBERT
+    - **Jenis**: Multilabel Classification
+    - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
+    """)
+if __name__ == "__main__":
+    main()

model/alergen_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28df831b272894c11265ef5f4cf1ac2a2ca89e765b26bff928f34c388ff015d5
+size 497868974

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit>=1.27.0
+torch>=2.0.0
+transformers>=4.35.0
+pandas>=2.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+scikit-learn>=1.3.0
+regex>=20