sachin commited on
Commit
c514e0c
·
1 Parent(s): 04e8f49

custom-prompt-pdf-kannada

Browse files
Files changed (1) hide show
  1. src/server/main.py +110 -0
src/server/main.py CHANGED
@@ -1167,6 +1167,116 @@ async def indic_custom_prompt_pdf(
1167
  logger.error(f"Invalid JSON response from external API: {str(e)}")
1168
  raise HTTPException(status_code=500, detail="Invalid response format from external API")
1169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1170
 
1171
 
1172
  if __name__ == "__main__":
 
1167
  logger.error(f"Invalid JSON response from external API: {str(e)}")
1168
  raise HTTPException(status_code=500, detail="Invalid response format from external API")
1169
 
1170
+ @app.post("/v1/indic-custom-prompt-kannada-pdf",
1171
+ summary="Generate Kannada PDF with Custom Prompt",
1172
+ description="Process a PDF with a custom prompt and generate a new PDF in Kannada using an external API.",
1173
+ tags=["PDF"],
1174
+ responses={
1175
+ 200: {"description": "Generated Kannada PDF file", "content": {"application/pdf": {"example": "Binary PDF data"}}},
1176
+ 400: {"description": "Invalid PDF, page number, prompt, or language"},
1177
+ 500: {"description": "External API error"},
1178
+ 504: {"description": "External API timeout"}
1179
+ })
1180
+ async def indic_custom_prompt_kannada_pdf(
1181
+ request: Request,
1182
+ file: UploadFile = File(..., description="PDF file to process"),
1183
+ page_number: int = Form(..., description="Page number to process (1-based indexing)"),
1184
+ prompt: str = Form(..., description="Custom prompt to process the page content (e.g., 'list key points')"),
1185
+ src_lang: str = Form(..., description="Source language code (e.g., eng_Latn)"),
1186
+ background_tasks: BackgroundTasks = BackgroundTasks()
1187
+ ):
1188
+ # Validate file
1189
+ if not file.filename.lower().endswith('.pdf'):
1190
+ raise HTTPException(status_code=400, detail="File must be a PDF")
1191
+
1192
+ # Validate page number
1193
+ if page_number < 1:
1194
+ raise HTTPException(status_code=400, detail="Page number must be at least 1")
1195
+
1196
+ # Validate prompt
1197
+ if not prompt.strip():
1198
+ raise HTTPException(status_code=400, detail="Prompt cannot be empty")
1199
+
1200
+ # Validate source language
1201
+ supported_languages = ["eng_Latn", "hin_Deva", "kan_Knda", "tam_Taml", "mal_Mlym", "tel_Telu"]
1202
+ if src_lang not in supported_languages:
1203
+ raise HTTPException(status_code=400, detail=f"Unsupported source language: {src_lang}. Must be one of {supported_languages}")
1204
+
1205
+ logger.info("Processing Kannada PDF generation request", extra={
1206
+ "endpoint": "/v1/indic-custom-prompt-kannada-pdf",
1207
+ "file_name": file.filename,
1208
+ "page_number": page_number,
1209
+ "prompt": prompt,
1210
+ "src_lang": src_lang,
1211
+ "client_ip": request.client.host
1212
+ })
1213
+
1214
+ external_url = f"{os.getenv('EXTERNAL_PDF_API_BASE_URL')}/indic-custom-prompt-kannada-pdf/"
1215
+ start_time = time()
1216
+
1217
+ # Create a temporary file to store the generated PDF
1218
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
1219
+ temp_file_path = temp_file.name
1220
+
1221
+ try:
1222
+ file_content = await file.read()
1223
+ files = {"file": (file.filename, file_content, "application/pdf")}
1224
+ data = {
1225
+ "page_number": page_number,
1226
+ "prompt": prompt,
1227
+ "src_lang": src_lang
1228
+ }
1229
+
1230
+ response = requests.post(
1231
+ external_url,
1232
+ files=files,
1233
+ data=data,
1234
+ headers={"accept": "application/json"},
1235
+ stream=True,
1236
+ timeout=60
1237
+ )
1238
+ response.raise_for_status()
1239
+
1240
+ # Write the PDF content to the temporary file
1241
+ with open(temp_file_path, "wb") as f:
1242
+ for chunk in response.iter_content(chunk_size=8192):
1243
+ if chunk:
1244
+ f.write(chunk)
1245
+
1246
+ # Prepare headers for the response
1247
+ headers = {
1248
+ "Content-Disposition": "attachment; filename=\"generated_kannada.pdf\"",
1249
+ "Cache-Control": "no-cache",
1250
+ }
1251
+
1252
+ # Schedule file cleanup as a background task
1253
+ def cleanup_file(file_path: str):
1254
+ try:
1255
+ if os.path.exists(file_path):
1256
+ os.unlink(file_path)
1257
+ logger.info(f"Deleted temporary file: {file_path}")
1258
+ except Exception as e:
1259
+ logger.error(f"Failed to delete temporary file {file_path}: {str(e)}")
1260
+
1261
+ background_tasks.add_task(cleanup_file, temp_file_path)
1262
+
1263
+ logger.info(f"Kannada PDF generation completed in {time() - start_time:.2f} seconds")
1264
+ return FileResponse(
1265
+ path=temp_file_path,
1266
+ filename="generated_kannada.pdf",
1267
+ media_type="application/pdf",
1268
+ headers=headers
1269
+ )
1270
+
1271
+ except requests.Timeout:
1272
+ logger.error("External Kannada PDF API timed out")
1273
+ raise HTTPException(status_code=504, detail="External API timeout")
1274
+ except requests.RequestException as e:
1275
+ logger.error(f"External Kannada PDF API error: {str(e)}")
1276
+ raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
1277
+ finally:
1278
+ # Close the temporary file to ensure it's fully written
1279
+ temp_file.close()
1280
 
1281
 
1282
  if __name__ == "__main__":