Alishbah commited on
Commit
f08423d
·
verified ·
1 Parent(s): 633c7f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -64
app.py CHANGED
@@ -177,76 +177,90 @@ def main():
177
  apply_theme(selected_theme)
178
 
179
  # --- Title and Welcome ---
180
- slider_value = st.slider("AI Plagiarism Detection Tool", min_value=0, max_value=100, value=50)
181
  st.markdown("<h1 class='welcome-text'>Welcome to AI & Plagiarism Detection</h1>", unsafe_allow_html=True)
182
 
183
- # --- Load Models ---
184
- ai_detection_model = load_ai_detection_model()
185
- tokenizer, plagiarism_model = load_plagiarism_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- # --- File Upload ---
188
- uploaded_files = st.file_uploader("Upload files (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
189
-
190
- if uploaded_files:
191
- for uploaded_file in uploaded_files:
192
- file_size = len(uploaded_file.getvalue())
193
- if file_size > 1000000000:
194
- st.error(f"{uploaded_file.name}: File size exceeds the 1GB limit.")
195
- continue
196
-
197
- try:
198
- if uploaded_file.type == "application/pdf":
199
- raw_text = extract_text_from_pdf(uploaded_file)
200
- elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
201
- raw_text = extract_text_from_docx(uploaded_file)
202
- else:
203
- raw_text = None
204
- st.error(f"{uploaded_file.name}: Unsupported file type")
205
  continue
206
 
207
- except Exception as e:
208
- st.error(f"Error processing {uploaded_file.name}: {e}")
209
- continue
210
-
211
- if raw_text:
212
- # --- Split text into manageable chunks ---
213
- text_chunks = split_text_into_chunks(raw_text, tokenizer)
214
-
215
- # --- AI Detection ---
216
- ai_percentage_avg = None
217
- human_percentage = None
218
- if ai_detection_model:
219
- ai_percentages = detect_ai_content(text_chunks, ai_detection_model)
220
- if ai_percentages:
221
- ai_percentage_avg = sum(ai_percentages) / len(ai_percentages) * 100
222
- human_percentage = 100 - ai_percentage_avg
223
-
224
- # --- Plagiarism Check ---
225
- plagiarism_percentage = None
226
- if tokenizer and plagiarism_model:
227
- plagiarism_percentage = plagiarism_check(text_chunks, tokenizer, plagiarism_model)
228
-
229
- # --- Tiled Output ---
230
- with st.container():
231
- st.markdown(f"<div class='output-box'><h3>{uploaded_file.name}</h3></div>", unsafe_allow_html=True)
232
-
233
- col1, col2 = st.columns(2)
234
-
235
- with col1:
236
- st.markdown("<div class='output-box'><h4>AI Detection:</h4></div>", unsafe_allow_html=True)
237
- if ai_percentage_avg is not None:
238
- st.metric(label="AI Content", value=f"{ai_percentage_avg:.2f}%", delta="AI Generated")
239
- st.metric(label="Human Written", value=f"{human_percentage:.2f}%", delta="Humanized Text")
240
- else:
241
- st.write("AI Detection not available")
242
-
243
- with col2:
244
- st.markdown("<div class='output-box'><h4>Plagiarism Detection:</h4></div>", unsafe_allow_html=True)
245
- if plagiarism_percentage is not None:
246
- st.metric(label="Plagiarism", value=f"{plagiarism_percentage:.2f}%", delta="Plagiarized" if plagiarism_percentage > 0 else "Original")
247
- else:
248
- st.write("Plagiarism Detection not available")
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  # --- Call Main ---
251
  if __name__ == "__main__":
 
252
  main()
 
177
  apply_theme(selected_theme)
178
 
179
  # --- Title and Welcome ---
 
180
  st.markdown("<h1 class='welcome-text'>Welcome to AI & Plagiarism Detection</h1>", unsafe_allow_html=True)
181
 
182
+ # --- Tabs for File Upload and Text Input ---
183
+ tab1, tab2 = st.tabs(["Upload File", "Enter Text"])
184
+
185
+ with tab1:
186
+ uploaded_files = st.file_uploader("Upload files (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
187
+ if uploaded_files:
188
+ for uploaded_file in uploaded_files:
189
+ file_size = len(uploaded_file.getvalue())
190
+ if file_size > 1000000000:
191
+ st.error(f"{uploaded_file.name}: File size exceeds the 1GB limit.")
192
+ continue
193
+
194
+ try:
195
+ if uploaded_file.type == "application/pdf":
196
+ raw_text = extract_text_from_pdf(uploaded_file)
197
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
198
+ raw_text = extract_text_from_docx(uploaded_file)
199
+ else:
200
+ raw_text = None
201
+ st.error(f"{uploaded_file.name}: Unsupported file type")
202
+ continue
203
 
204
+ except Exception as e:
205
+ st.error(f"Error processing {uploaded_file.name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  continue
207
 
208
+ if raw_text:
209
+ # Split text into manageable chunks
210
+ text_chunks = split_text_into_chunks(raw_text, tokenizer)
211
+ process_and_display(text_chunks, uploaded_file.name, ai_detection_model, tokenizer, plagiarism_model)
212
+
213
+ with tab2:
214
+ manual_text = st.text_area("Enter text here", "")
215
+ if manual_text:
216
+ text_chunks = split_text_into_chunks(manual_text, tokenizer)
217
+ process_and_display(text_chunks, "Manual Input", ai_detection_model, tokenizer, plagiarism_model)
218
+
219
+ # --- Helper function to process text and display results ---
220
+ def process_and_display(text_chunks, source_name, ai_detection_model, tokenizer, plagiarism_model):
221
+ # AI Detection
222
+ ai_percentage_avg = None
223
+ human_percentage = None
224
+ if ai_detection_model:
225
+ ai_percentages = detect_ai_content(text_chunks, ai_detection_model)
226
+ if ai_percentages:
227
+ ai_percentage_avg = sum(ai_percentages) / len(ai_percentages) * 100
228
+ human_percentage = 100 - ai_percentage_avg
229
+
230
+ # Plagiarism Check
231
+ plagiarism_percentage = None
232
+ if tokenizer and plagiarism_model:
233
+ plagiarism_percentage = plagiarism_check(text_chunks, tokenizer, plagiarism_model)
234
+
235
+ # --- Tiled Output ---
236
+ with st.container():
237
+ st.markdown(f"<div class='output-box'><h3>{source_name}</h3></div>", unsafe_allow_html=True)
238
+
239
+ col1, col2 = st.columns(2)
240
+
241
+ with col1:
242
+ st.markdown("<div class='output-box'><h4>AI Detection:</h4></div>", unsafe_allow_html=True)
243
+ if ai_percentage_avg is not None:
244
+ st.metric(label="AI Content", value=f"{ai_percentage_avg:.2f}%", delta="AI Generated")
245
+ st.metric(label="Human Written", value=f"{human_percentage:.2f}%", delta="Humanized Text")
246
+ else:
247
+ st.write("AI Detection not available")
248
+
249
+ with col2:
250
+ st.markdown("<div class='output-box'><h4>Plagiarism Detection:</h4></div>", unsafe_allow_html=True)
251
+ if plagiarism_percentage is not None:
252
+ st.metric(label="Plagiarism", value=f"{plagiarism_percentage:.2f}%", delta="Plagiarized" if plagiarism_percentage > 0 else "Original")
253
+ else:
254
+ st.write("Plagiarism Detection not available")
255
+
256
+ # --- Load models globally ---
257
+ @st.cache_resource
258
+ def load_models():
259
+ ai_detection_model = load_ai_detection_model()
260
+ tokenizer, plagiarism_model = load_plagiarism_model()
261
+ return ai_detection_model, tokenizer, plagiarism_model
262
 
263
  # --- Call Main ---
264
  if __name__ == "__main__":
265
+ ai_detection_model, tokenizer, plagiarism_model = load_models() # Load models
266
  main()