Essay-Grader commited on
Commit
8a681a9
·
0 Parent(s):

Initial commit with Docker deployment

Browse files
Files changed (5) hide show
  1. Dockerfile +26 -0
  2. README.md +85 -0
  3. app/main.py +291 -0
  4. app_loader.py +3 -0
  5. requirements.txt +228 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy requirements first for better caching
12
+ COPY ./requirements.txt /code/requirements.txt
13
+ RUN pip install --no-cache-dir -r /code/requirements.txt
14
+
15
+ # Create model cache directory
16
+ RUN mkdir -p ./model_cache
17
+
18
+ # Copy application code
19
+ COPY . /code/
20
+
21
+ # Pre-download models (this will take some time)
22
+ RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('sentence-transformers/all-roberta-large-v1')"
23
+ RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; tokenizer = AutoTokenizer.from_pretrained('ChrispamWrites/roberta-ai-detector-20250401_232702', cache_dir='./model_cache'); model = AutoModelForSequenceClassification.from_pretrained('ChrispamWrites/roberta-ai-detector-20250401_232702', cache_dir='./model_cache')"
24
+
25
+ # Run the application on port 7860 (Hugging Face Space default port)
26
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Essay Grader API
2
+
3
+ This API uses advanced AI models to evaluate essays for:
4
+ - AI-generated content detection (identifies if content was written by AI)
5
+ - Internal plagiarism detection (identifies repetitive patterns within the text)
6
+
7
+ ## Endpoints
8
+
9
+ ### `GET /health`
10
+ Checks the API health status and model loading state.
11
+
12
+ **Response:**
13
+ ```json
14
+ {
15
+ "model_loaded": true,
16
+ "hub_accessible": true,
17
+ "pdf_processing": true
18
+ }
19
+ ```
20
+
21
+ ### `POST /analyze`
22
+ Upload a PDF essay for comprehensive analysis.
23
+
24
+ **Request:**
25
+ - Content-Type: multipart/form-data
26
+ - Body: file (PDF document)
27
+
28
+ **Response:**
29
+ ```json
30
+ {
31
+ "ai_content_detection": {
32
+ "label": "Human-written",
33
+ "confidence": 92.5
34
+ },
35
+ "internal_plagiarism_score": 18.3,
36
+ "max_similarity_between_chunks": 45.2,
37
+ "chunks_analyzed": 12
38
+ }
39
+ ```
40
+
41
+ # Narrowed Response
42
+
43
+ **Response:**
44
+ ```json
45
+ {
46
+ "ai_content_detection": {
47
+ "confidence": 92.5
48
+ },
49
+ "internal_plagiarism_score": 18.3,
50
+ }
51
+ ```
52
+
53
+ ## Usage Examples
54
+
55
+ ### Using cURL:
56
+ ```bash
57
+ curl -X 'POST' \
58
+ 'https://yourusername-essay-grader-api.hf.space/analyze' \
59
+ -H 'accept: application/json' \
60
+ -H 'Content-Type: multipart/form-data' \
61
+ -F 'file=@your_essay.pdf'
62
+ ```
63
+
64
+ ### Using Python Requests:
65
+ ```python
66
+ import requests
67
+
68
+ url = "https://yourusername-essay-grader-api.hf.space/analyze"
69
+ files = {"file": open("your_essay.pdf", "rb")}
70
+
71
+ response = requests.post(url, files=files)
72
+ result = response.json()
73
+ print(result)
74
+ ```
75
+
76
+ ## Technical Details
77
+
78
+ This API uses:
79
+ - RoBERTa-based models for AI content detection
80
+ - Sentence transformers for semantic analysis
81
+ - PyPDF2 for PDF text extraction
82
+
83
+ The application is built with FastAPI and deployed on Hugging Face Spaces.
84
+
85
+ ```Created by: Christian Mpambira(BED-COM-22-20)```
app/main.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py: API for AI detection and plagiarism checking using FastAPI
2
+
3
+ from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
4
+ from fastapi.responses import JSONResponse
5
+ from sentence_transformers import SentenceTransformer
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
7
+ from PyPDF2 import PdfReader
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ import torch
10
+ import os
11
+ import shutil
12
+ import uuid
13
+ import tempfile
14
+ import logging
15
+ import requests
16
+ import time
17
+ from typing import Dict, Any, List
18
+
19
+ # Set up logging
20
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
21
+ logger = logging.getLogger(__name__)
22
+
23
+ app = FastAPI(
24
+ title="Essay Grader API",
25
+ description="API for AI content detection and internal plagiarism detection",
26
+ version="1.0.0"
27
+ )
28
+
29
+ # Global variables to track model loading status
30
+ model_status = {
31
+ "model_loaded": False,
32
+ "hub_accessible": False,
33
+ "pdf_processing": True,
34
+ "last_error": None,
35
+ "last_reload_attempt": None
36
+ }
37
+
38
+ # Global variables for models
39
+ embedder = None
40
+ ai_tokenizer = None
41
+ ai_model = None
42
+
43
+ # Maximum number of retries for model loading
44
+ MAX_RETRIES = 3
45
+ # Time between reload attempts (in seconds)
46
+ RELOAD_INTERVAL = 300 # 5 minutes
47
+
48
+ def load_models_impl():
49
+ """Implementation of model loading logic with proper error handling"""
50
+ global embedder, ai_tokenizer, ai_model, model_status
51
+
52
+ # Track attempt time
53
+ model_status["last_reload_attempt"] = time.time()
54
+ model_status["last_error"] = None
55
+
56
+ try:
57
+ # Check Hugging Face Hub connectivity
58
+ response = requests.head("https://huggingface.co", timeout=5)
59
+ if response.status_code == 200:
60
+ model_status["hub_accessible"] = True
61
+ logger.info("Successfully connected to Hugging Face Hub")
62
+ else:
63
+ logger.error(f"Failed to connect to Hugging Face Hub: {response.status_code}")
64
+ except Exception as e:
65
+ logger.error(f"Error checking Hugging Face Hub connectivity: {e}")
66
+
67
+ try:
68
+ # Load SentenceTransformer model for embeddings
69
+ logger.info("Loading SentenceTransformer model...")
70
+ embedder = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
71
+
72
+ # Load AI detection model
73
+ ai_model_name = "ChrispamWrites/roberta-ai-detector-20250401_232702"
74
+ logger.info(f"Loading AI detection model: {ai_model_name}")
75
+
76
+ # Use local cache if available or download from HF
77
+ ai_tokenizer = AutoTokenizer.from_pretrained(
78
+ ai_model_name,
79
+ local_files_only=not model_status["hub_accessible"],
80
+ cache_dir="./model_cache"
81
+ )
82
+
83
+ # Load the config first
84
+ ai_config = AutoConfig.from_pretrained(
85
+ ai_model_name,
86
+ local_files_only=not model_status["hub_accessible"],
87
+ cache_dir="./model_cache"
88
+ )
89
+
90
+ # Modify the config to match the checkpoint's expected dimensions
91
+ ai_config.max_position_embeddings = 514
92
+ ai_config.type_vocab_size = 1
93
+
94
+ # Load the model with this config
95
+ ai_model = AutoModelForSequenceClassification.from_pretrained(
96
+ ai_model_name,
97
+ config=ai_config,
98
+ local_files_only=not model_status["hub_accessible"],
99
+ cache_dir="./model_cache"
100
+ )
101
+
102
+ # If the above doesn't work, try with ignore_mismatched_sizes
103
+ if ai_model is None:
104
+ logger.info("Attempting to load model with ignore_mismatched_sizes=True")
105
+ ai_model = AutoModelForSequenceClassification.from_pretrained(
106
+ ai_model_name,
107
+ local_files_only=not model_status["hub_accessible"],
108
+ cache_dir="./model_cache",
109
+ ignore_mismatched_sizes=True
110
+ )
111
+
112
+ # Verify models are loaded by testing them
113
+ test_sentence = "This is a test sentence to verify model loading."
114
+
115
+ # Test sentence transformer
116
+ _ = embedder.encode(test_sentence)
117
+
118
+ # Test AI detection model
119
+ inputs = ai_tokenizer(test_sentence, return_tensors="pt", max_length=512, truncation=True)
120
+ with torch.no_grad():
121
+ _ = ai_model(**inputs)
122
+
123
+ model_status["model_loaded"] = True
124
+ logger.info("Models loaded successfully!")
125
+ return True
126
+ except Exception as e:
127
+ error_msg = f"Error loading models: {str(e)}"
128
+ logger.error(error_msg)
129
+ model_status["model_loaded"] = False
130
+ model_status["last_error"] = error_msg
131
+ return False
132
+
133
+ # Load models with proper error handling
134
+ @app.on_event("startup")
135
+ async def load_models():
136
+ """Initial model loading on startup with retry mechanism"""
137
+ retries = 0
138
+ while retries < MAX_RETRIES:
139
+ if load_models_impl():
140
+ break
141
+ retries += 1
142
+ logger.info(f"Retrying model loading ({retries}/{MAX_RETRIES})...")
143
+ time.sleep(5) # Wait 5 seconds before retrying
144
+
145
+ if not model_status["model_loaded"]:
146
+ logger.warning(f"Failed to load models after {MAX_RETRIES} attempts. API will start, but analyze endpoint won't work.")
147
+
148
+ async def background_model_reload(background_tasks: BackgroundTasks):
149
+ """Background task to reload models"""
150
+ if load_models_impl():
151
+ logger.info("Successfully reloaded models in background task")
152
+ else:
153
+ logger.error("Failed to reload models in background task")
154
+
155
+ def extract_text_from_pdf(pdf_path):
156
+ try:
157
+ reader = PdfReader(pdf_path)
158
+ text = ""
159
+ for page in reader.pages:
160
+ text += page.extract_text() or ""
161
+ return text
162
+ except Exception as e:
163
+ logger.error(f"Failed to extract text from PDF: {e}")
164
+ raise RuntimeError(f"Failed to extract text: {e}")
165
+
166
+ def chunk_text(text, chunk_size=5):
167
+ sentences = text.split(".")
168
+ chunks = [".".join(sentences[i:i + chunk_size]).strip() for i in range(0, len(sentences), chunk_size)]
169
+ return [chunk for chunk in chunks if chunk]
170
+
171
+ def detect_ai_generated(text):
172
+ inputs = ai_tokenizer(text, truncation=True, padding=True, return_tensors="pt", max_length=512)
173
+ with torch.no_grad():
174
+ outputs = ai_model(**inputs)
175
+ logits = outputs.logits
176
+ probs = torch.softmax(logits, dim=1).squeeze()
177
+ predicted_class = torch.argmax(probs).item()
178
+ confidence = probs[predicted_class].item()
179
+
180
+ return {
181
+ "label": "AI-generated" if predicted_class == 1 else "Human-written",
182
+ "confidence": round(confidence * 100, 2)
183
+ }
184
+
185
+ @app.get("/health")
186
+ async def health_check() -> Dict[str, Any]:
187
+ """Health check endpoint to verify API and model status"""
188
+ # Check if reload is needed
189
+ current_time = time.time()
190
+ reload_needed = (
191
+ not model_status["model_loaded"] and
192
+ (model_status["last_reload_attempt"] is None or
193
+ current_time - model_status["last_reload_attempt"] > RELOAD_INTERVAL)
194
+ )
195
+
196
+ return {
197
+ **model_status,
198
+ "reload_needed": reload_needed,
199
+ "last_reload_attempt_time": time.strftime('%Y-%m-%d %H:%M:%S',
200
+ time.localtime(model_status["last_reload_attempt"]))
201
+ if model_status["last_reload_attempt"] else None
202
+ }
203
+
204
+ @app.post("/reload-models")
205
+ async def reload_models(background_tasks: BackgroundTasks):
206
+ """Endpoint to manually trigger model reloading"""
207
+ # Check if enough time has passed since last reload attempt
208
+ current_time = time.time()
209
+ if (model_status["last_reload_attempt"] is not None and
210
+ current_time - model_status["last_reload_attempt"] < 60): # Prevent reloading more than once per minute
211
+ return JSONResponse(content={
212
+ "message": "Too many reload attempts. Please wait before trying again.",
213
+ "seconds_until_next_attempt": 60 - int(current_time - model_status["last_reload_attempt"])
214
+ }, status_code=429)
215
+
216
+ background_tasks.add_task(background_model_reload, background_tasks)
217
+ return {"message": "Model reload initiated in background"}
218
+
219
+ @app.post("/analyze")
220
+ async def analyze_essay(file: UploadFile = File(...), background_tasks: BackgroundTasks = None):
221
+ global model_status, embedder, ai_tokenizer, ai_model
222
+
223
+ # Check if models are loaded
224
+ if not model_status["model_loaded"]:
225
+ # Check if we should attempt to reload models
226
+ current_time = time.time()
227
+ reload_needed = (
228
+ model_status["last_reload_attempt"] is None or
229
+ current_time - model_status["last_reload_attempt"] > RELOAD_INTERVAL
230
+ )
231
+
232
+ if reload_needed and background_tasks:
233
+ # Start a background reload
234
+ background_tasks.add_task(background_model_reload, background_tasks)
235
+ message = "Models are being reloaded in the background. Please try again in a few minutes."
236
+ else:
237
+ message = "Model not loaded. Check /health endpoint for details or try /reload-models endpoint."
238
+
239
+ raise HTTPException(status_code=503, detail=message)
240
+
241
+ # Check if models are actually initialized
242
+ if embedder is None or ai_tokenizer is None or ai_model is None:
243
+ logger.error("Models appear loaded but variables are None")
244
+ raise HTTPException(status_code=503, detail="Model initialization incomplete. Please try again later.")
245
+
246
+ if not file.filename.endswith(".pdf"):
247
+ raise HTTPException(status_code=400, detail="Only PDF files are supported")
248
+
249
+ with tempfile.TemporaryDirectory() as tmpdir:
250
+ file_path = os.path.join(tmpdir, f"{uuid.uuid4()}.pdf")
251
+ with open(file_path, "wb") as buffer:
252
+ shutil.copyfileobj(file.file, buffer)
253
+
254
+ try:
255
+ essay_text = extract_text_from_pdf(file_path)
256
+ except RuntimeError as e:
257
+ raise HTTPException(status_code=500, detail=str(e))
258
+
259
+ if not essay_text.strip():
260
+ raise HTTPException(status_code=400, detail="The PDF seems to contain no extractable text.")
261
+
262
+ try:
263
+ # Run AI content detection
264
+ ai_result = detect_ai_generated(essay_text)
265
+
266
+ # Run internal plagiarism detection
267
+ chunks = chunk_text(essay_text)
268
+ if len(chunks) < 2:
269
+ raise HTTPException(status_code=400, detail="Not enough text chunks to assess internal plagiarism.")
270
+
271
+ embeddings = embedder.encode(chunks)
272
+ similarities = []
273
+ for i in range(len(embeddings)):
274
+ for j in range(i + 1, len(embeddings)):
275
+ sim = cosine_similarity([embeddings[i]], [embeddings[j]])[0][0]
276
+ similarities.append(sim)
277
+
278
+ max_similarity = max(similarities) if similarities else 0
279
+ avg_similarity = sum(similarities) / len(similarities) if similarities else 0
280
+ internal_score = round(avg_similarity * 100, 2)
281
+ except Exception as e:
282
+ logger.error(f"Error during analysis: {e}")
283
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
284
+
285
+ return JSONResponse(content={
286
+ "ai_content_confidence": ai_result["confidence"],
287
+ "internal_plagiarism_score": internal_score,
288
+ "debug_note": "Processed with fixed model configuration"
289
+ })
290
+
291
+
app_loader.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from app import app
2
+
3
+ # This file is to ensure the app is imported correctly by the Hugging Face Spaces environment
requirements.txt ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.2.1
2
+ accelerate==1.5.2
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.11.14
5
+ aiosignal==1.3.2
6
+ annotated-types==0.7.0
7
+ anyio==4.9.0
8
+ argcomplete==1.10.0
9
+ argon2-cffi==23.1.0
10
+ argon2-cffi-bindings==21.2.0
11
+ arrow==1.3.0
12
+ asttokens==3.0.0
13
+ astunparse==1.6.3
14
+ async-lru==2.0.5
15
+ attrs==25.3.0
16
+ babel==2.17.0
17
+ beautifulsoup4==4.8.0
18
+ bleach==6.2.0
19
+ CacheControl==0.14.2
20
+ cachetools==5.5.2
21
+ certifi==2025.1.31
22
+ cffi==1.17.1
23
+ chardet==3.0.4
24
+ charset-normalizer==3.4.1
25
+ click==8.1.8
26
+ colorama==0.4.6
27
+ comm==0.2.2
28
+ contourpy==1.3.1
29
+ cryptography==44.0.2
30
+ cycler==0.12.1
31
+ datasets==3.5.0
32
+ debugpy==1.8.13
33
+ decorator==5.2.1
34
+ defusedxml==0.7.1
35
+ dill==0.3.8
36
+ docx2txt==0.8
37
+ EbookLib==0.17.1
38
+ evaluate==0.4.3
39
+ executing==2.2.0
40
+ extract-msg==0.23.1
41
+ fastapi==0.115.12
42
+ fastjsonschema==2.21.1
43
+ filelock==3.18.0
44
+ firebase-admin==6.7.0
45
+ flatbuffers==25.2.10
46
+ fonttools==4.56.0
47
+ fqdn==1.5.1
48
+ frozenlist==1.5.0
49
+ fsspec==2024.12.0
50
+ gast==0.6.0
51
+ google-api-core==2.24.2
52
+ google-api-python-client==2.166.0
53
+ google-auth==2.38.0
54
+ google-auth-httplib2==0.2.0
55
+ google-auth-oauthlib==1.2.1
56
+ google-cloud-core==2.4.3
57
+ google-cloud-firestore==2.20.1
58
+ google-cloud-storage==3.1.0
59
+ google-crc32c==1.7.1
60
+ google-pasta==0.2.0
61
+ google-resumable-media==2.7.2
62
+ googleapis-common-protos==1.69.2
63
+ grpcio==1.71.0
64
+ grpcio-status==1.71.0
65
+ gunicorn==23.0.0
66
+ h11==0.14.0
67
+ h5py==3.13.0
68
+ httpcore==1.0.7
69
+ httplib2==0.22.0
70
+ httpx==0.28.1
71
+ huggingface-hub==0.29.3
72
+ idna==3.10
73
+ IMAPClient==2.1.0
74
+ ipykernel==6.29.5
75
+ ipython==9.0.2
76
+ ipython_pygments_lexers==1.1.1
77
+ ipywidgets==8.1.5
78
+ isoduration==20.11.0
79
+ jedi==0.19.2
80
+ Jinja2==3.1.6
81
+ joblib==1.4.2
82
+ json5==0.10.0
83
+ jsonpointer==3.0.0
84
+ jsonschema==4.23.0
85
+ jsonschema-specifications==2024.10.1
86
+ jupyter==1.1.1
87
+ jupyter-console==6.6.3
88
+ jupyter-events==0.12.0
89
+ jupyter-lsp==2.2.5
90
+ jupyter_client==8.6.3
91
+ jupyter_core==5.7.2
92
+ jupyter_server==2.15.0
93
+ jupyter_server_terminals==0.5.3
94
+ jupyterlab==4.3.6
95
+ jupyterlab_pygments==0.3.0
96
+ jupyterlab_server==2.27.3
97
+ jupyterlab_widgets==3.0.13
98
+ keras==3.9.1
99
+ kiwisolver==1.4.8
100
+ libclang==18.1.1
101
+ lxml==5.3.2
102
+ Markdown==3.7
103
+ markdown-it-py==3.0.0
104
+ MarkupSafe==3.0.2
105
+ matplotlib==3.10.1
106
+ matplotlib-inline==0.1.7
107
+ mdurl==0.1.2
108
+ mistune==3.1.3
109
+ ml_dtypes==0.5.1
110
+ mpmath==1.3.0
111
+ msgpack==1.1.0
112
+ multidict==6.2.0
113
+ multiprocess==0.70.16
114
+ namex==0.0.8
115
+ nbclient==0.10.2
116
+ nbconvert==7.16.6
117
+ nbformat==5.10.4
118
+ nest-asyncio==1.6.0
119
+ networkx==3.4.2
120
+ nltk==3.9.1
121
+ notebook==7.3.3
122
+ notebook_shim==0.2.4
123
+ numpy==1.26.4
124
+ oauthlib==3.2.2
125
+ olefile==0.46
126
+ opt_einsum==3.4.0
127
+ optree==0.14.1
128
+ overrides==7.7.0
129
+ packaging==24.2
130
+ pandas==2.2.3
131
+ pandocfilters==1.5.1
132
+ parso==0.8.4
133
+ pdfminer.six==20181108
134
+ pillow==11.1.0
135
+ platformdirs==4.3.7
136
+ prometheus_client==0.21.1
137
+ prompt_toolkit==3.0.50
138
+ propcache==0.3.1
139
+ proto-plus==1.26.1
140
+ protobuf==5.29.4
141
+ psutil==7.0.0
142
+ pure_eval==0.2.3
143
+ pyarrow==19.0.1
144
+ pyasn1==0.6.1
145
+ pyasn1_modules==0.4.1
146
+ pycparser==2.22
147
+ pycryptodome==3.22.0
148
+ pydantic==2.11.1
149
+ pydantic_core==2.33.0
150
+ Pygments==2.19.1
151
+ PyJWT==2.10.1
152
+ pyparsing==3.2.3
153
+ PyPDF2==3.0.1
154
+ python-dateutil==2.9.0.post0
155
+ python-docx==1.1.2
156
+ python-dotenv==1.1.0
157
+ python-json-logger==3.3.0
158
+ python-multipart==0.0.20
159
+ python-pptx==0.6.18
160
+ pytz==2025.2
161
+ pywin32==310
162
+ pywinpty==2.0.15
163
+ PyYAML==6.0.2
164
+ pyzmq==26.3.0
165
+ referencing==0.36.2
166
+ regex==2024.11.6
167
+ requests==2.32.3
168
+ requests-oauthlib==2.0.0
169
+ rfc3339-validator==0.1.4
170
+ rfc3986-validator==0.1.1
171
+ rich==13.9.4
172
+ rpds-py==0.24.0
173
+ rsa==4.9
174
+ safetensors==0.5.3
175
+ scikit-learn==1.6.1
176
+ scipy==1.15.2
177
+ seaborn==0.13.2
178
+ Send2Trash==1.8.3
179
+ sentence-transformers==4.1.0
180
+ six==1.12.0
181
+ sniffio==1.3.1
182
+ sortedcontainers==2.4.0
183
+ soupsieve==2.6
184
+ SpeechRecognition==3.8.1
185
+ stack-data==0.6.3
186
+ starlette==0.46.1
187
+ sympy==1.13.1
188
+ tensorboard==2.19.0
189
+ tensorboard-data-server==0.7.2
190
+ tensorflow==2.19.0
191
+ tensorflow-estimator==2.15.0
192
+ tensorflow-intel==2.15.1
193
+ tensorflow-io-gcs-filesystem==0.31.0
194
+ termcolor==2.5.0
195
+ terminado==0.18.1
196
+ textblob==0.19.0
197
+ textract==1.6.3
198
+ tf_keras==2.19.0
199
+ threadpoolctl==3.6.0
200
+ tinycss2==1.4.0
201
+ tokenizers==0.21.1
202
+ torch==2.6.0
203
+ torchaudio==2.6.0
204
+ torchvision==0.21.0
205
+ tornado==6.4.2
206
+ tqdm==4.67.1
207
+ traitlets==5.14.3
208
+ transformers==4.50.2
209
+ types-python-dateutil==2.9.0.20241206
210
+ typing-inspection==0.4.0
211
+ typing_extensions==4.13.0
212
+ tzdata==2025.2
213
+ tzlocal==1.5.1
214
+ uri-template==1.3.0
215
+ uritemplate==4.1.1
216
+ urllib3==2.3.0
217
+ uvicorn==0.34.0
218
+ wcwidth==0.2.13
219
+ webcolors==24.11.1
220
+ webencodings==0.5.1
221
+ websocket-client==1.8.0
222
+ Werkzeug==3.1.3
223
+ widgetsnbextension==4.0.13
224
+ wrapt==1.14.1
225
+ xlrd==1.2.0
226
+ XlsxWriter==3.2.2
227
+ xxhash==3.5.0
228
+ yarl==1.18.3