Commit
·
7e5fff1
1
Parent(s):
5c3b44c
Made changes to the app, requirements, runtime and added Procfile
Browse files- Procfile +1 -0
- app/app.py +47 -13
- requirements.txt +10 -14
- runtime.txt +1 -1
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: uvicorn app:app --host 0.0.0.0 --port $PORT
|
app/app.py
CHANGED
@@ -8,6 +8,7 @@ from PyPDF2 import PdfReader
|
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import torch
|
10 |
import os
|
|
|
11 |
import shutil
|
12 |
import uuid
|
13 |
import tempfile
|
@@ -54,11 +55,6 @@ def load_models_impl():
|
|
54 |
model_status["last_error"] = None
|
55 |
|
56 |
try:
|
57 |
-
# Placeholder for the code that should be inside the try block
|
58 |
-
pass
|
59 |
-
except Exception as e:
|
60 |
-
logger.error(f"An error occurred: {e}")
|
61 |
-
raise HTTPException(status_code=500, detail="An internal error occurred.")
|
62 |
# Check Hugging Face Hub connectivity
|
63 |
response = requests.head("https://huggingface.co", timeout=5)
|
64 |
if response.status_code == 200:
|
@@ -271,15 +267,53 @@ async def analyze_essay(file: UploadFile = File(...), background_tasks: Backgrou
|
|
271 |
# Run internal plagiarism detection
|
272 |
chunks = chunk_text(essay_text)
|
273 |
if len(chunks) < 2:
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
embeddings = embedder.encode(chunks)
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
except Exception as e:
|
282 |
logger.error(f"An error occurred during analysis: {e}")
|
283 |
-
raise HTTPException(status_code=500, detail="An error occurred during analysis
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import torch
|
10 |
import os
|
11 |
+
import numpy as np
|
12 |
import shutil
|
13 |
import uuid
|
14 |
import tempfile
|
|
|
55 |
model_status["last_error"] = None
|
56 |
|
57 |
try:
|
|
|
|
|
|
|
|
|
|
|
58 |
# Check Hugging Face Hub connectivity
|
59 |
response = requests.head("https://huggingface.co", timeout=5)
|
60 |
if response.status_code == 200:
|
|
|
267 |
# Run internal plagiarism detection
|
268 |
chunks = chunk_text(essay_text)
|
269 |
if len(chunks) < 2:
|
270 |
+
return JSONResponse(content={
|
271 |
+
"ai_detection": ai_result,
|
272 |
+
"internal_plagiarism": {
|
273 |
+
"detected": False,
|
274 |
+
"message": "Not enough text chunks to assess internal plagiarism."
|
275 |
+
}
|
276 |
+
})
|
277 |
|
278 |
embeddings = embedder.encode(chunks)
|
279 |
+
|
280 |
+
# Calculate similarity matrix (excluding self-comparisons)
|
281 |
+
similarity_matrix = cosine_similarity(embeddings)
|
282 |
+
np.fill_diagonal(similarity_matrix, 0) # Set diagonal to 0 to ignore self-similarity
|
283 |
+
|
284 |
+
# Find highest similarity pairs
|
285 |
+
plagiarism_threshold = 0.85 # Adjust as needed
|
286 |
+
high_similarities = []
|
287 |
+
|
288 |
+
for i in range(len(chunks)):
|
289 |
+
for j in range(i+1, len(chunks)):
|
290 |
+
similarity = similarity_matrix[i][j]
|
291 |
+
if similarity > plagiarism_threshold:
|
292 |
+
high_similarities.append({
|
293 |
+
"chunk1_index": i,
|
294 |
+
"chunk2_index": j,
|
295 |
+
"chunk1_text": chunks[i],
|
296 |
+
"chunk2_text": chunks[j],
|
297 |
+
"similarity": round(float(similarity), 2)
|
298 |
+
})
|
299 |
+
|
300 |
+
# Sort by similarity (highest first)
|
301 |
+
high_similarities.sort(key=lambda x: x["similarity"], reverse=True)
|
302 |
+
|
303 |
+
return JSONResponse(content={
|
304 |
+
"ai_detection": ai_result,
|
305 |
+
"internal_plagiarism": {
|
306 |
+
"detected": len(high_similarities) > 0,
|
307 |
+
"similarity_pairs": high_similarities[:5], # Return top 5 similar pairs
|
308 |
+
"total_similar_pairs": len(high_similarities)
|
309 |
+
}
|
310 |
+
})
|
311 |
+
|
312 |
except Exception as e:
|
313 |
logger.error(f"An error occurred during analysis: {e}")
|
314 |
+
raise HTTPException(status_code=500, detail=f"An error occurred during analysis: {str(e)}")
|
315 |
+
|
316 |
+
@app.get("/")
|
317 |
+
async def root():
|
318 |
+
"""Root endpoint that redirects to docs"""
|
319 |
+
return {"message": "Essay Grader API is running. Visit /docs for the API documentation."}
|
requirements.txt
CHANGED
@@ -1,15 +1,11 @@
|
|
1 |
-
fastapi==0.115.
|
2 |
uvicorn==0.34.0
|
3 |
-
transformers==4.
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
aiohttp
|
13 |
-
python-multipart
|
14 |
-
python-dotenv
|
15 |
-
requests
|
|
|
1 |
+
fastapi==0.115.0
|
2 |
uvicorn==0.34.0
|
3 |
+
transformers==4.39.3
|
4 |
+
sentence-transformers==2.5.1
|
5 |
+
torch==2.2.2
|
6 |
+
scikit-learn==1.4.0
|
7 |
+
PyPDF2==3.0.1
|
8 |
+
numpy==1.26.4
|
9 |
+
pandas==2.2.1
|
10 |
+
requests==2.31.0
|
11 |
+
python-multipart==0.0.9
|
|
|
|
|
|
|
|
runtime.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
python-3.
|
|
|
1 |
+
python-3.11.9
|