rdsarjito commited on
Commit
552cd20
·
1 Parent(s): 87227dc
Files changed (3) hide show
  1. app.py +264 -0
  2. model/alergen_model.pt +3 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.utils.data import Dataset
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ import matplotlib.pyplot as plt
11
+ import warnings
12
+ warnings.filterwarnings("ignore")
13
+
14
+ # Set page config
15
+ st.set_page_config(
16
+ page_title="Deteksi Alergen dalam Resep",
17
+ page_icon="🍲",
18
+ layout="wide"
19
+ )
20
+
21
+ # Set device
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+
24
+ # Clean text function
25
+ def clean_text(text):
26
+ # Convert dashes to spaces for better tokenization
27
+ text = text.replace('--', ' ')
28
+ # Basic cleaning
29
+ text = re.sub(r"http\S+", "", text)
30
+ text = re.sub('\n', ' ', text)
31
+ text = re.sub("[^a-zA-Z0-9\s]", " ", text)
32
+ text = re.sub(" {2,}", " ", text)
33
+ text = text.strip()
34
+ text = text.lower()
35
+ return text
36
+
37
+ # Define model for multilabel classification
38
+ class MultilabelBertClassifier(nn.Module):
39
+ def __init__(self, model_name, num_labels):
40
+ super(MultilabelBertClassifier, self).__init__()
41
+ self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
42
+ # Replace the classification head with our own for multilabel
43
+ self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
44
+
45
+ def forward(self, input_ids, attention_mask):
46
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
47
+ return outputs.logits
48
+
49
+ # Function to predict allergens in new recipes
50
+ @st.cache_resource
51
+ def load_model():
52
+ # Target columns
53
+ target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
54
+
55
+ # Initialize tokenizer
56
+ tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
57
+
58
+ # Initialize model
59
+ model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
60
+
61
+ # Load model weights if available
62
+ model_path = "model/alergen_model.pt"
63
+
64
+ try:
65
+ # Try to load the model
66
+ checkpoint = torch.load(model_path, map_location=device)
67
+ model.load_state_dict(checkpoint['model_state_dict'])
68
+ st.success("Model berhasil dimuat!")
69
+ except Exception as e:
70
+ st.error(f"Error loading model: {str(e)}")
71
+ st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
72
+
73
+ model.to(device)
74
+ model.eval()
75
+
76
+ return model, tokenizer, target_columns
77
+
78
+ def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
79
+ # Clean the text
80
+ cleaned_text = clean_text(ingredients_text)
81
+
82
+ # Tokenize
83
+ encoding = tokenizer.encode_plus(
84
+ cleaned_text,
85
+ add_special_tokens=True,
86
+ max_length=max_length,
87
+ truncation=True,
88
+ return_tensors='pt',
89
+ padding='max_length'
90
+ )
91
+
92
+ input_ids = encoding['input_ids'].to(device)
93
+ attention_mask = encoding['attention_mask'].to(device)
94
+
95
+ with torch.no_grad():
96
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
97
+ predictions = torch.sigmoid(outputs)
98
+ predictions_prob = predictions.cpu().numpy()[0]
99
+ predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
100
+
101
+ result = {}
102
+ for i, target in enumerate(target_columns):
103
+ result[target] = {
104
+ 'present': bool(predictions_binary[i]),
105
+ 'probability': float(predictions_prob[i])
106
+ }
107
+
108
+ return result
109
+
110
+ # Main application
111
+ def main():
112
+ st.title("Deteksi Alergen dalam Resep")
113
+ st.markdown("""
114
+ Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
115
+ Alergen yang diidentifikasi meliputi:
116
+ - Susu
117
+ - Kacang
118
+ - Telur
119
+ - Makanan Laut
120
+ - Gandum
121
+ """)
122
+
123
+ # Sidebar for model upload
124
+ st.sidebar.header("Upload Model")
125
+ uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
126
+
127
+ if uploaded_model is not None:
128
+ with open("alergen_model.pt", "wb") as f:
129
+ f.write(uploaded_model.getbuffer())
130
+ st.sidebar.success("Model telah diupload dan dimuat!")
131
+
132
+ # Load model
133
+ model, tokenizer, target_columns = load_model()
134
+
135
+ # Input area
136
+ st.header("Masukkan Daftar Bahan Resep")
137
+ ingredients = st.text_area("Bahan-bahan:", height=200,
138
+ placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
139
+
140
+ col1, col2 = st.columns(2)
141
+
142
+ with col1:
143
+ if st.button("Deteksi Alergen", type="primary"):
144
+ if ingredients:
145
+ with st.spinner("Menganalisis bahan-bahan..."):
146
+ # Clean text for display
147
+ cleaned_text = clean_text(ingredients)
148
+ st.markdown("### Bahan yang diproses:")
149
+ st.text(cleaned_text)
150
+
151
+ # Get predictions
152
+ results = predict_allergens(ingredients, model, tokenizer, target_columns)
153
+
154
+ # Display results
155
+ st.markdown("### Hasil Deteksi Alergen:")
156
+
157
+ # Create data for visualization
158
+ allergens = list(results.keys())
159
+ probabilities = [results[a]['probability'] for a in allergens]
160
+ present = [results[a]['present'] for a in allergens]
161
+
162
+ # Create a colorful table of results
163
+ result_df = pd.DataFrame({
164
+ 'Alergen': [a.title() for a in allergens],
165
+ 'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
166
+ 'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
167
+ })
168
+
169
+ st.dataframe(result_df, use_container_width=True)
170
+
171
+ # Display chart in the second column
172
+ with col2:
173
+ fig, ax = plt.subplots(figsize=(10, 6))
174
+ bars = ax.bar(
175
+ [a.title() for a in allergens],
176
+ probabilities,
177
+ color=['red' if p else 'green' for p in present]
178
+ )
179
+
180
+ # Add threshold line
181
+ ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
182
+ ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
183
+
184
+ # Customize the chart
185
+ ax.set_ylim(0, 1)
186
+ ax.set_ylabel('Probabilitas')
187
+ ax.set_title('Probabilitas Deteksi Alergen')
188
+
189
+ # Add values on top of bars
190
+ for bar in bars:
191
+ height = bar.get_height()
192
+ ax.annotate(f'{height:.2f}',
193
+ xy=(bar.get_x() + bar.get_width() / 2, height),
194
+ xytext=(0, 3), # 3 points vertical offset
195
+ textcoords="offset points",
196
+ ha='center', va='bottom')
197
+
198
+ st.pyplot(fig)
199
+
200
+ # Show detailed explanation
201
+ st.markdown("### Penjelasan Hasil:")
202
+ detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
203
+
204
+ if detected_allergens:
205
+ st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
206
+
207
+ # Provide specific explanation for each detected allergen
208
+ for allergen in detected_allergens:
209
+ if allergen.lower() == 'susu':
210
+ st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
211
+ elif allergen.lower() == 'kacang':
212
+ st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
213
+ elif allergen.lower() == 'telur':
214
+ st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
215
+ elif allergen.lower() == 'makanan_laut':
216
+ st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
217
+ elif allergen.lower() == 'gandum':
218
+ st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
219
+ else:
220
+ st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
221
+
222
+ st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
223
+ else:
224
+ st.error("Mohon masukkan daftar bahan terlebih dahulu.")
225
+
226
+ # Examples section
227
+ with st.expander("Contoh Resep"):
228
+ st.markdown("""
229
+ ### Contoh Resep 1 (Mengandung Beberapa Alergen)
230
+ ```
231
+ 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
232
+ ```
233
+
234
+ ### Contoh Resep 2 (Mengandung Susu)
235
+ ```
236
+ 250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
237
+ ```
238
+
239
+ ### Contoh Resep 3 (Mengandung Makanan Laut)
240
+ ```
241
+ 250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
242
+ ```
243
+ """)
244
+
245
+ # About section
246
+ st.sidebar.markdown("---")
247
+ st.sidebar.header("Tentang")
248
+ st.sidebar.info("""
249
+ Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
250
+
251
+ Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
252
+ """)
253
+
254
+ # Model information
255
+ st.sidebar.markdown("---")
256
+ st.sidebar.header("Informasi Model")
257
+ st.sidebar.markdown("""
258
+ - **Model Dasar**: IndoBERT
259
+ - **Jenis**: Multilabel Classification
260
+ - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
261
+ """)
262
+
263
+ if __name__ == "__main__":
264
+ main()
model/alergen_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28df831b272894c11265ef5f4cf1ac2a2ca89e765b26bff928f34c388ff015d5
3
+ size 497868974
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.27.0
2
+ torch>=2.0.0
3
+ transformers>=4.35.0
4
+ pandas>=2.0.0
5
+ numpy>=1.24.0
6
+ matplotlib>=3.7.0
7
+ scikit-learn>=1.3.0
8
+ regex>=20