rdsarjito commited on
Commit
c0cfde6
·
1 Parent(s): 552cd20
Files changed (2) hide show
  1. app.py +171 -186
  2. requirements.txt +4 -7
app.py CHANGED
@@ -1,27 +1,40 @@
1
  import streamlit as st
2
- import os
3
- import numpy as np
4
- import pandas as pd
5
- import re
6
  import torch
7
  import torch.nn as nn
8
- from torch.utils.data import Dataset
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
- import matplotlib.pyplot as plt
11
- import warnings
12
- warnings.filterwarnings("ignore")
13
 
14
  # Set page config
15
  st.set_page_config(
16
- page_title="Deteksi Alergen dalam Resep",
17
- page_icon="🍲",
18
  layout="wide"
19
  )
20
 
 
 
 
 
 
 
 
21
  # Set device
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
 
 
 
 
 
 
 
 
 
 
 
24
  # Clean text function
 
25
  def clean_text(text):
26
  # Convert dashes to spaces for better tokenization
27
  text = text.replace('--', ' ')
@@ -46,36 +59,38 @@ class MultilabelBertClassifier(nn.Module):
46
  outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
47
  return outputs.logits
48
 
49
- # Function to predict allergens in new recipes
50
  @st.cache_resource
51
- def load_model():
52
- # Target columns
53
- target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
54
-
55
- # Initialize tokenizer
56
- tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
57
-
58
- # Initialize model
59
- model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
60
-
61
- # Load model weights if available
62
- model_path = "model/alergen_model.pt"
63
-
64
  try:
65
- # Try to load the model
66
- checkpoint = torch.load(model_path, map_location=device)
67
- model.load_state_dict(checkpoint['model_state_dict'])
68
- st.success("Model berhasil dimuat!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  except Exception as e:
70
- st.error(f"Error loading model: {str(e)}")
71
- st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
72
-
73
- model.to(device)
74
- model.eval()
75
-
76
- return model, tokenizer, target_columns
77
 
78
- def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
 
 
 
 
79
  # Clean the text
80
  cleaned_text = clean_text(ingredients_text)
81
 
@@ -95,170 +110,140 @@ def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_le
95
  with torch.no_grad():
96
  outputs = model(input_ids=input_ids, attention_mask=attention_mask)
97
  predictions = torch.sigmoid(outputs)
98
- predictions_prob = predictions.cpu().numpy()[0]
99
- predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
100
 
101
  result = {}
 
102
  for i, target in enumerate(target_columns):
103
- result[target] = {
104
- 'present': bool(predictions_binary[i]),
105
- 'probability': float(predictions_prob[i])
106
- }
107
 
108
- return result
109
 
110
- # Main application
111
- def main():
112
- st.title("Deteksi Alergen dalam Resep")
113
- st.markdown("""
114
- Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
115
- Alergen yang diidentifikasi meliputi:
116
- - Susu
117
- - Kacang
118
- - Telur
119
- - Makanan Laut
120
- - Gandum
121
- """)
122
-
123
- # Sidebar for model upload
124
- st.sidebar.header("Upload Model")
125
- uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
126
 
127
  if uploaded_model is not None:
128
  with open("alergen_model.pt", "wb") as f:
129
  f.write(uploaded_model.getbuffer())
130
- st.sidebar.success("Model telah diupload dan dimuat!")
 
131
 
132
- # Load model
133
- model, tokenizer, target_columns = load_model()
134
-
135
- # Input area
136
- st.header("Masukkan Daftar Bahan Resep")
137
- ingredients = st.text_area("Bahan-bahan:", height=200,
138
- placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
139
-
140
- col1, col2 = st.columns(2)
141
-
142
- with col1:
143
- if st.button("Deteksi Alergen", type="primary"):
144
- if ingredients:
145
- with st.spinner("Menganalisis bahan-bahan..."):
146
- # Clean text for display
147
- cleaned_text = clean_text(ingredients)
148
- st.markdown("### Bahan yang diproses:")
149
- st.text(cleaned_text)
150
-
151
- # Get predictions
152
- results = predict_allergens(ingredients, model, tokenizer, target_columns)
153
-
154
- # Display results
155
- st.markdown("### Hasil Deteksi Alergen:")
156
-
157
- # Create data for visualization
158
- allergens = list(results.keys())
159
- probabilities = [results[a]['probability'] for a in allergens]
160
- present = [results[a]['present'] for a in allergens]
161
-
162
- # Create a colorful table of results
163
- result_df = pd.DataFrame({
164
- 'Alergen': [a.title() for a in allergens],
165
- 'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
166
- 'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
167
- })
168
-
169
- st.dataframe(result_df, use_container_width=True)
170
-
171
- # Display chart in the second column
172
- with col2:
173
- fig, ax = plt.subplots(figsize=(10, 6))
174
- bars = ax.bar(
175
- [a.title() for a in allergens],
176
- probabilities,
177
- color=['red' if p else 'green' for p in present]
178
- )
179
-
180
- # Add threshold line
181
- ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
182
- ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
183
-
184
- # Customize the chart
185
- ax.set_ylim(0, 1)
186
- ax.set_ylabel('Probabilitas')
187
- ax.set_title('Probabilitas Deteksi Alergen')
188
-
189
- # Add values on top of bars
190
- for bar in bars:
191
- height = bar.get_height()
192
- ax.annotate(f'{height:.2f}',
193
- xy=(bar.get_x() + bar.get_width() / 2, height),
194
- xytext=(0, 3), # 3 points vertical offset
195
- textcoords="offset points",
196
- ha='center', va='bottom')
197
-
198
- st.pyplot(fig)
199
-
200
- # Show detailed explanation
201
- st.markdown("### Penjelasan Hasil:")
202
- detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
203
 
204
- if detected_allergens:
205
- st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
206
-
207
- # Provide specific explanation for each detected allergen
208
- for allergen in detected_allergens:
209
- if allergen.lower() == 'susu':
210
- st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
211
- elif allergen.lower() == 'kacang':
212
- st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
213
- elif allergen.lower() == 'telur':
214
- st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
215
- elif allergen.lower() == 'makanan_laut':
216
- st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
217
- elif allergen.lower() == 'gandum':
218
- st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
219
- else:
220
- st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
221
 
222
- st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  else:
224
- st.error("Mohon masukkan daftar bahan terlebih dahulu.")
225
-
226
- # Examples section
227
- with st.expander("Contoh Resep"):
228
- st.markdown("""
229
- ### Contoh Resep 1 (Mengandung Beberapa Alergen)
230
- ```
231
- 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
232
- ```
233
-
234
- ### Contoh Resep 2 (Mengandung Susu)
235
- ```
236
- 250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
237
- ```
238
-
239
- ### Contoh Resep 3 (Mengandung Makanan Laut)
240
- ```
241
- 250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
242
- ```
243
- """)
244
 
245
- # About section
246
- st.sidebar.markdown("---")
247
- st.sidebar.header("Tentang")
248
- st.sidebar.info("""
249
- Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
250
-
251
- Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
 
 
 
 
 
 
 
 
 
 
252
  """)
253
 
254
- # Model information
255
- st.sidebar.markdown("---")
256
- st.sidebar.header("Informasi Model")
257
- st.sidebar.markdown("""
258
- - **Model Dasar**: IndoBERT
259
- - **Jenis**: Multilabel Classification
260
- - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
261
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- if __name__ == "__main__":
264
- main()
 
 
1
  import streamlit as st
 
 
 
 
2
  import torch
3
  import torch.nn as nn
4
+ import re
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import os
7
+ import numpy as np
 
8
 
9
  # Set page config
10
  st.set_page_config(
11
+ page_title="Deteksi Alergen Resep",
12
+ page_icon="🍽️",
13
  layout="wide"
14
  )
15
 
16
+ # App title and description
17
+ st.title("🍽️ Deteksi Alergen Resep Makanan")
18
+ st.markdown("""
19
+ Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia.
20
+ Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung.
21
+ """)
22
+
23
  # Set device
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
 
26
+ # Define target columns (allergens)
27
+ target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
28
+ allergen_descriptions = {
29
+ 'susu': 'Produk susu (milk products)',
30
+ 'kacang': 'Kacang-kacangan (nuts)',
31
+ 'telur': 'Telur (eggs)',
32
+ 'makanan_laut': 'Makanan laut (seafood)',
33
+ 'gandum': 'Gandum/gluten (wheat/gluten)'
34
+ }
35
+
36
  # Clean text function
37
+ @st.cache_data
38
  def clean_text(text):
39
  # Convert dashes to spaces for better tokenization
40
  text = text.replace('--', ' ')
 
59
  outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
60
  return outputs.logits
61
 
 
62
  @st.cache_resource
63
+ def load_model_and_tokenizer():
 
 
 
 
 
 
 
 
 
 
 
 
64
  try:
65
+ # Initialize tokenizer
66
+ tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
67
+
68
+ # Initialize model
69
+ model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
70
+
71
+ # Check if model exists locally, otherwise download from Hugging Face
72
+ model_path = "alergen_model.pt"
73
+
74
+ if os.path.exists(model_path):
75
+ st.info("Loading model from local storage...")
76
+ checkpoint = torch.load(model_path, map_location=device)
77
+ model.load_state_dict(checkpoint['model_state_dict'])
78
+ else:
79
+ st.warning("Model file not found. Please upload your model file.")
80
+
81
+ model.to(device)
82
+ model.eval()
83
+
84
+ return model, tokenizer
85
  except Exception as e:
86
+ st.error(f"Error loading model: {e}")
87
+ return None, None
 
 
 
 
 
88
 
89
+ # Function to predict allergens in new recipes
90
+ def predict_allergens(model, tokenizer, ingredients_text, max_length=128):
91
+ if not model or not tokenizer:
92
+ return None
93
+
94
  # Clean the text
95
  cleaned_text = clean_text(ingredients_text)
96
 
 
110
  with torch.no_grad():
111
  outputs = model(input_ids=input_ids, attention_mask=attention_mask)
112
  predictions = torch.sigmoid(outputs)
113
+ predictions_np = predictions.cpu().numpy()[0]
114
+ binary_predictions = (predictions > 0.5).float().cpu().numpy()[0]
115
 
116
  result = {}
117
+ confidence = {}
118
  for i, target in enumerate(target_columns):
119
+ result[target] = bool(binary_predictions[i])
120
+ confidence[target] = float(predictions_np[i])
 
 
121
 
122
+ return result, confidence
123
 
124
+ # Sidebar for model upload
125
+ with st.sidebar:
126
+ st.header("Model Management")
127
+ uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"])
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  if uploaded_model is not None:
130
  with open("alergen_model.pt", "wb") as f:
131
  f.write(uploaded_model.getbuffer())
132
+ st.success("Model uploaded successfully!")
133
+ st.cache_resource.clear()
134
 
135
+ st.markdown("---")
136
+ st.markdown("### Tentang Aplikasi")
137
+ st.markdown("""
138
+ Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi
139
+ potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen
140
+ umum dalam makanan.
141
+ """)
142
+
143
+ # Load model and tokenizer
144
+ model, tokenizer = load_model_and_tokenizer()
145
+
146
+ # Main content
147
+ st.header("Masukkan Bahan-bahan Resep")
148
+
149
+ # Text area for ingredients input
150
+ ingredients = st.text_area(
151
+ "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
152
+ height=150,
153
+ placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..."
154
+ )
155
+
156
+ # Predict button
157
+ if st.button("Deteksi Alergen", type="primary"):
158
+ if not ingredients:
159
+ st.warning("Silakan masukkan daftar bahan terlebih dahulu.")
160
+ elif not model:
161
+ st.error("Model belum tersedia. Silakan upload model terlebih dahulu.")
162
+ else:
163
+ with st.spinner("Menganalisis resep..."):
164
+ results, confidence = predict_allergens(model, tokenizer, ingredients)
165
+
166
+ if results:
167
+ st.header("Hasil Deteksi Alergen")
168
+
169
+ # Display detected allergens
170
+ detected_allergens = [allergen for allergen, present in results.items() if present]
171
+
172
+ if detected_allergens:
173
+ st.markdown("### ⚠️ Alergen Terdeteksi:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ # Create columns for the allergen cards
176
+ cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ for i, allergen in enumerate(detected_allergens):
179
+ col_idx = i % 3
180
+ with cols[col_idx]:
181
+ st.markdown(f"""
182
+ <div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;">
183
+ <h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4>
184
+ <p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p>
185
+ </div>
186
+ """, unsafe_allow_html=True)
187
+ else:
188
+ st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.")
189
+
190
+ # Display detailed analysis
191
+ with st.expander("Lihat Analisis Detail"):
192
+ st.markdown("### Tingkat Kepercayaan Per Alergen")
193
+ for allergen in target_columns:
194
+ conf_value = confidence[allergen]
195
+ st.markdown(f"**{allergen_descriptions[allergen]}:** {conf_value*100:.1f}%")
196
+ st.progress(conf_value)
197
  else:
198
+ st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ # Example recipe section
201
+ with st.expander("Lihat Contoh Resep"):
202
+ st.markdown("""
203
+ **Gado-gado:**
204
+ 1 bungkus Lontong homemade
205
+ 2 butir Telur ayam
206
+ 2 kotak kecil Tahu coklat
207
+ 4 butir kecil Kentang
208
+ 2 buah Tomat merah
209
+ 1 buah Ketimun lalap
210
+ 4 lembar Selada keriting
211
+ 2 lembar Kol putih
212
+ 2 porsi Saus kacang homemade
213
+ 4 buah Kerupuk udang goreng
214
+ Secukupnya emping goreng
215
+ 2 sdt Bawang goreng
216
+ Secukupnya Kecap manis
217
  """)
218
 
219
+ if st.button("Gunakan Contoh Ini"):
220
+ st.session_state.example_used = True
221
+ # Will be processed in next rerun
222
+
223
+ # Handle example
224
+ if 'example_used' in st.session_state and st.session_state.example_used:
225
+ example_recipe = """1 bungkus Lontong homemade
226
+ 2 butir Telur ayam
227
+ 2 kotak kecil Tahu coklat
228
+ 4 butir kecil Kentang
229
+ 2 buah Tomat merah
230
+ 1 buah Ketimun lalap
231
+ 4 lembar Selada keriting
232
+ 2 lembar Kol putih
233
+ 2 porsi Saus kacang homemade
234
+ 4 buah Kerupuk udang goreng
235
+ Secukupnya emping goreng
236
+ 2 sdt Bawang goreng
237
+ Secukupnya Kecap manis"""
238
+
239
+ st.session_state.example_used = False
240
+ st.text_area(
241
+ "Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
242
+ value=example_recipe,
243
+ height=150,
244
+ key="ingredients_example"
245
+ )
246
 
247
+ # Footer
248
+ st.markdown("---")
249
+ st.markdown("*Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.*")
requirements.txt CHANGED
@@ -1,8 +1,5 @@
1
- streamlit>=1.27.0
2
  torch>=2.0.0
3
- transformers>=4.35.0
4
- pandas>=2.0.0
5
- numpy>=1.24.0
6
- matplotlib>=3.7.0
7
- scikit-learn>=1.3.0
8
- regex>=20
 
1
+ streamlit>=1.24.0
2
  torch>=2.0.0
3
+ transformers>=4.30.0
4
+ numpy>=1.22.0
5
+ regex>=2022.1.18