Spaces:
Running
Running
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import re | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import requests | |
from bs4 import BeautifulSoup | |
# Set page configuration | |
st.set_page_config(page_title="Aplikasi Deteksi Alergen", page_icon="π²", layout="wide") | |
# Target label | |
target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] | |
# Device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Text cleaning | |
def clean_text(text): | |
text = text.replace('--', ' ') | |
text = re.sub(r"http\S+", "", text) | |
text = re.sub('\n', ' ', text) | |
text = re.sub("[^a-zA-Z0-9\s]", " ", text) | |
text = re.sub(" {2,}", " ", text) | |
text = text.strip().lower() | |
return text | |
# Multilabel BERT model | |
class MultilabelBertClassifier(nn.Module): | |
def __init__(self, model_name, num_labels): | |
super(MultilabelBertClassifier, self).__init__() | |
self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) | |
self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
def forward(self, input_ids, attention_mask): | |
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
return outputs.logits | |
# Load model | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') | |
model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) | |
try: | |
state_dict = torch.load('model/alergen_model.pt', map_location=device) | |
if 'model_state_dict' in state_dict: | |
model_state_dict = state_dict['model_state_dict'] | |
else: | |
model_state_dict = state_dict | |
new_state_dict = {k[7:] if k.startswith('module.') else k: v for k, v in model_state_dict.items()} | |
model.load_state_dict(new_state_dict, strict=False) | |
st.success("Model berhasil dimuat!") | |
except Exception as e: | |
st.error(f"Error loading model: {str(e)}") | |
st.info("Menggunakan model tanpa pre-trained weights.") | |
model.to(device) | |
model.eval() | |
return tokenizer, model | |
def predict_alergens(ingredients_text, tokenizer, model, threshold=0.5, max_length=128): | |
cleaned_text = clean_text(ingredients_text) | |
encoding = tokenizer.encode_plus( | |
cleaned_text, | |
add_special_tokens=True, | |
max_length=max_length, | |
truncation=True, | |
return_tensors='pt', | |
padding='max_length' | |
) | |
input_ids = encoding['input_ids'].to(device) | |
attention_mask = encoding['attention_mask'].to(device) | |
with torch.no_grad(): | |
outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
probs = torch.sigmoid(outputs).cpu().numpy()[0] # hasil sigmoid (0-1) | |
results = [] | |
for i, label in enumerate(target_columns): | |
present = probs[i] > threshold | |
percent = float(probs[i]) * 100 | |
results.append({ | |
'label': label, | |
'present': present, | |
'probability': percent | |
}) | |
return results | |
# Scrape Cookpad | |
def scrape_ingredients_from_url(url): | |
try: | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
ingredients_div = soup.find('div', id='ingredients') | |
if not ingredients_div: | |
return None | |
items = ingredients_div.find_all(['li', 'span']) | |
ingredients = [item.get_text(strip=True) for item in items if item.get_text(strip=True)] | |
return '\n'.join(ingredients) | |
except Exception as e: | |
st.error(f"Gagal mengambil data dari URL: {e}") | |
return None | |
# Main App | |
def main(): | |
st.title("Aplikasi Deteksi Alergen dalam Resep") | |
st.markdown(""" | |
Aplikasi ini memprediksi alergen yang terkandung dalam resep makanan berdasarkan bahan-bahan. | |
""") | |
with st.spinner("Memuat model..."): | |
tokenizer, model = load_model() | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.subheader("Masukkan URL Resep dari Cookpad") | |
url = st.text_input("Contoh: https://cookpad.com/id/resep/24678703-gulai-telur-tahu-dan-kacang-panjang") | |
threshold = st.slider( | |
"Atur Threshold Deteksi Alergen", | |
min_value=0.1, | |
max_value=0.9, | |
value=0.5, | |
step=0.05, | |
help="Semakin rendah threshold, semakin sensitif model terhadap kemungkinan adanya alergen." | |
) | |
if st.button("Deteksi Alergen", type="primary"): | |
if url: | |
with st.spinner("Mengambil bahan resep dari URL..."): | |
ingredients = scrape_ingredients_from_url(url) | |
if ingredients: | |
st.text_area("Daftar Bahan", ingredients, height=200) | |
with st.spinner("Menganalisis bahan..."): | |
alergens = predict_alergens(ingredients, tokenizer, model, threshold=threshold) | |
with col2: | |
st.subheader("Hasil Deteksi") | |
emoji_map = { | |
'susu': 'π₯', | |
'kacang': 'π₯', | |
'telur': 'π₯', | |
'makanan_laut': 'π¦', | |
'gandum': 'πΎ' | |
} | |
detected = [] | |
for result in alergens: | |
label = result['label'] | |
name = label.replace('_', ' ').title() | |
prob = result['probability'] | |
present = result['present'] | |
emoji = emoji_map.get(label, '') | |
if present: | |
st.error(f"{emoji} {name}: Terdeteksi β οΈ ({prob:.2f}%)") | |
detected.append(name) | |
else: | |
st.success(f"{emoji} {name}: Tidak Terdeteksi β ({prob:.2f}%)") | |
if detected: | |
st.warning(f"Resep ini mengandung alergen: {', '.join(detected)}") | |
else: | |
st.success("Resep ini tidak mengandung alergen yang terdeteksi.") | |
else: | |
st.warning("Gagal mengambil bahan dari halaman Cookpad. Pastikan URL valid.") | |
else: | |
st.warning("Silakan masukkan URL resep terlebih dahulu.") | |
with st.expander("Tentang Aplikasi"): | |
st.markdown(""" | |
Aplikasi ini menggunakan model IndoBERT untuk deteksi 5 jenis alergen dari bahan resep: | |
- Susu π₯ | |
- Kacang π₯ | |
- Telur π₯ | |
- Makanan Laut π¦ | |
- Gandum πΎ | |
""") | |
if __name__ == "__main__": | |
main() | |