import streamlit as st import torch import torch.nn as nn import re import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification import requests from bs4 import BeautifulSoup # Set page configuration st.set_page_config(page_title="Aplikasi Deteksi Alergen", page_icon="🍲", layout="wide") # Target label target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] # Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Text cleaning def clean_text(text): text = text.replace('--', ' ') text = re.sub(r"http\S+", "", text) text = re.sub('\n', ' ', text) text = re.sub("[^a-zA-Z0-9\s]", " ", text) text = re.sub(" {2,}", " ", text) text = text.strip().lower() return text # Multilabel BERT model class MultilabelBertClassifier(nn.Module): def __init__(self, model_name, num_labels): super(MultilabelBertClassifier, self).__init__() self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) def forward(self, input_ids, attention_mask): outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) return outputs.logits # Load model @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) try: state_dict = torch.load('model/alergen_model.pt', map_location=device) if 'model_state_dict' in state_dict: model_state_dict = state_dict['model_state_dict'] else: model_state_dict = state_dict new_state_dict = {k[7:] if k.startswith('module.') else k: v for k, v in model_state_dict.items()} model.load_state_dict(new_state_dict, strict=False) st.success("Model berhasil dimuat!") except Exception as e: st.error(f"Error loading model: {str(e)}") st.info("Menggunakan model tanpa pre-trained weights.") model.to(device) model.eval() return tokenizer, model def predict_alergens(ingredients_text, tokenizer, model, threshold=0.5, max_length=128): cleaned_text = clean_text(ingredients_text) encoding = tokenizer.encode_plus( cleaned_text, add_special_tokens=True, max_length=max_length, truncation=True, return_tensors='pt', padding='max_length' ) input_ids = encoding['input_ids'].to(device) attention_mask = encoding['attention_mask'].to(device) with torch.no_grad(): outputs = model(input_ids=input_ids, attention_mask=attention_mask) probs = torch.sigmoid(outputs).cpu().numpy()[0] # hasil sigmoid (0-1) results = [] for i, label in enumerate(target_columns): present = probs[i] > threshold percent = float(probs[i]) * 100 results.append({ 'label': label, 'present': present, 'probability': percent }) return results # Scrape Cookpad def scrape_ingredients_from_url(url): try: headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') ingredients_div = soup.find('div', id='ingredients') if not ingredients_div: return None items = ingredients_div.find_all(['li', 'span']) ingredients = [item.get_text(strip=True) for item in items if item.get_text(strip=True)] return '\n'.join(ingredients) except Exception as e: st.error(f"Gagal mengambil data dari URL: {e}") return None # Main App def main(): st.title("Aplikasi Deteksi Alergen dalam Resep") st.markdown(""" Aplikasi ini memprediksi alergen yang terkandung dalam resep makanan berdasarkan bahan-bahan. """) with st.spinner("Memuat model..."): tokenizer, model = load_model() col1, col2 = st.columns([3, 2]) with col1: st.subheader("Masukkan URL Resep dari Cookpad") url = st.text_input("Contoh: https://cookpad.com/id/resep/24678703-gulai-telur-tahu-dan-kacang-panjang") threshold = st.slider( "Atur Threshold Deteksi Alergen", min_value=0.1, max_value=0.9, value=0.5, step=0.05, help="Semakin rendah threshold, semakin sensitif model terhadap kemungkinan adanya alergen." ) if st.button("Deteksi Alergen", type="primary"): if url: with st.spinner("Mengambil bahan resep dari URL..."): ingredients = scrape_ingredients_from_url(url) if ingredients: st.text_area("Daftar Bahan", ingredients, height=200) with st.spinner("Menganalisis bahan..."): alergens = predict_alergens(ingredients, tokenizer, model, threshold=threshold) with col2: st.subheader("Hasil Deteksi") emoji_map = { 'susu': '🥛', 'kacang': '🥜', 'telur': '🥚', 'makanan_laut': '🦐', 'gandum': '🌾' } detected = [] for result in alergens: label = result['label'] name = label.replace('_', ' ').title() prob = result['probability'] present = result['present'] emoji = emoji_map.get(label, '') if present: st.error(f"{emoji} {name}: Terdeteksi ⚠️ ({prob:.2f}%)") detected.append(name) else: st.success(f"{emoji} {name}: Tidak Terdeteksi ✓ ({prob:.2f}%)") if detected: st.warning(f"Resep ini mengandung alergen: {', '.join(detected)}") else: st.success("Resep ini tidak mengandung alergen yang terdeteksi.") else: st.warning("Gagal mengambil bahan dari halaman Cookpad. Pastikan URL valid.") else: st.warning("Silakan masukkan URL resep terlebih dahulu.") with st.expander("Tentang Aplikasi"): st.markdown(""" Aplikasi ini menggunakan model IndoBERT untuk deteksi 5 jenis alergen dari bahan resep: - Susu 🥛 - Kacang 🥜 - Telur 🥚 - Makanan Laut 🦐 - Gandum 🌾 """) if __name__ == "__main__": main()