Spaces:
Running
Running
sachin
commited on
Commit
·
c514e0c
1
Parent(s):
04e8f49
custom-prompt-pdf-kannada
Browse files- src/server/main.py +110 -0
src/server/main.py
CHANGED
@@ -1167,6 +1167,116 @@ async def indic_custom_prompt_pdf(
|
|
1167 |
logger.error(f"Invalid JSON response from external API: {str(e)}")
|
1168 |
raise HTTPException(status_code=500, detail="Invalid response format from external API")
|
1169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1170 |
|
1171 |
|
1172 |
if __name__ == "__main__":
|
|
|
1167 |
logger.error(f"Invalid JSON response from external API: {str(e)}")
|
1168 |
raise HTTPException(status_code=500, detail="Invalid response format from external API")
|
1169 |
|
1170 |
+
@app.post("/v1/indic-custom-prompt-kannada-pdf",
|
1171 |
+
summary="Generate Kannada PDF with Custom Prompt",
|
1172 |
+
description="Process a PDF with a custom prompt and generate a new PDF in Kannada using an external API.",
|
1173 |
+
tags=["PDF"],
|
1174 |
+
responses={
|
1175 |
+
200: {"description": "Generated Kannada PDF file", "content": {"application/pdf": {"example": "Binary PDF data"}}},
|
1176 |
+
400: {"description": "Invalid PDF, page number, prompt, or language"},
|
1177 |
+
500: {"description": "External API error"},
|
1178 |
+
504: {"description": "External API timeout"}
|
1179 |
+
})
|
1180 |
+
async def indic_custom_prompt_kannada_pdf(
|
1181 |
+
request: Request,
|
1182 |
+
file: UploadFile = File(..., description="PDF file to process"),
|
1183 |
+
page_number: int = Form(..., description="Page number to process (1-based indexing)"),
|
1184 |
+
prompt: str = Form(..., description="Custom prompt to process the page content (e.g., 'list key points')"),
|
1185 |
+
src_lang: str = Form(..., description="Source language code (e.g., eng_Latn)"),
|
1186 |
+
background_tasks: BackgroundTasks = BackgroundTasks()
|
1187 |
+
):
|
1188 |
+
# Validate file
|
1189 |
+
if not file.filename.lower().endswith('.pdf'):
|
1190 |
+
raise HTTPException(status_code=400, detail="File must be a PDF")
|
1191 |
+
|
1192 |
+
# Validate page number
|
1193 |
+
if page_number < 1:
|
1194 |
+
raise HTTPException(status_code=400, detail="Page number must be at least 1")
|
1195 |
+
|
1196 |
+
# Validate prompt
|
1197 |
+
if not prompt.strip():
|
1198 |
+
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
|
1199 |
+
|
1200 |
+
# Validate source language
|
1201 |
+
supported_languages = ["eng_Latn", "hin_Deva", "kan_Knda", "tam_Taml", "mal_Mlym", "tel_Telu"]
|
1202 |
+
if src_lang not in supported_languages:
|
1203 |
+
raise HTTPException(status_code=400, detail=f"Unsupported source language: {src_lang}. Must be one of {supported_languages}")
|
1204 |
+
|
1205 |
+
logger.info("Processing Kannada PDF generation request", extra={
|
1206 |
+
"endpoint": "/v1/indic-custom-prompt-kannada-pdf",
|
1207 |
+
"file_name": file.filename,
|
1208 |
+
"page_number": page_number,
|
1209 |
+
"prompt": prompt,
|
1210 |
+
"src_lang": src_lang,
|
1211 |
+
"client_ip": request.client.host
|
1212 |
+
})
|
1213 |
+
|
1214 |
+
external_url = f"{os.getenv('EXTERNAL_PDF_API_BASE_URL')}/indic-custom-prompt-kannada-pdf/"
|
1215 |
+
start_time = time()
|
1216 |
+
|
1217 |
+
# Create a temporary file to store the generated PDF
|
1218 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
|
1219 |
+
temp_file_path = temp_file.name
|
1220 |
+
|
1221 |
+
try:
|
1222 |
+
file_content = await file.read()
|
1223 |
+
files = {"file": (file.filename, file_content, "application/pdf")}
|
1224 |
+
data = {
|
1225 |
+
"page_number": page_number,
|
1226 |
+
"prompt": prompt,
|
1227 |
+
"src_lang": src_lang
|
1228 |
+
}
|
1229 |
+
|
1230 |
+
response = requests.post(
|
1231 |
+
external_url,
|
1232 |
+
files=files,
|
1233 |
+
data=data,
|
1234 |
+
headers={"accept": "application/json"},
|
1235 |
+
stream=True,
|
1236 |
+
timeout=60
|
1237 |
+
)
|
1238 |
+
response.raise_for_status()
|
1239 |
+
|
1240 |
+
# Write the PDF content to the temporary file
|
1241 |
+
with open(temp_file_path, "wb") as f:
|
1242 |
+
for chunk in response.iter_content(chunk_size=8192):
|
1243 |
+
if chunk:
|
1244 |
+
f.write(chunk)
|
1245 |
+
|
1246 |
+
# Prepare headers for the response
|
1247 |
+
headers = {
|
1248 |
+
"Content-Disposition": "attachment; filename=\"generated_kannada.pdf\"",
|
1249 |
+
"Cache-Control": "no-cache",
|
1250 |
+
}
|
1251 |
+
|
1252 |
+
# Schedule file cleanup as a background task
|
1253 |
+
def cleanup_file(file_path: str):
|
1254 |
+
try:
|
1255 |
+
if os.path.exists(file_path):
|
1256 |
+
os.unlink(file_path)
|
1257 |
+
logger.info(f"Deleted temporary file: {file_path}")
|
1258 |
+
except Exception as e:
|
1259 |
+
logger.error(f"Failed to delete temporary file {file_path}: {str(e)}")
|
1260 |
+
|
1261 |
+
background_tasks.add_task(cleanup_file, temp_file_path)
|
1262 |
+
|
1263 |
+
logger.info(f"Kannada PDF generation completed in {time() - start_time:.2f} seconds")
|
1264 |
+
return FileResponse(
|
1265 |
+
path=temp_file_path,
|
1266 |
+
filename="generated_kannada.pdf",
|
1267 |
+
media_type="application/pdf",
|
1268 |
+
headers=headers
|
1269 |
+
)
|
1270 |
+
|
1271 |
+
except requests.Timeout:
|
1272 |
+
logger.error("External Kannada PDF API timed out")
|
1273 |
+
raise HTTPException(status_code=504, detail="External API timeout")
|
1274 |
+
except requests.RequestException as e:
|
1275 |
+
logger.error(f"External Kannada PDF API error: {str(e)}")
|
1276 |
+
raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
|
1277 |
+
finally:
|
1278 |
+
# Close the temporary file to ensure it's fully written
|
1279 |
+
temp_file.close()
|
1280 |
|
1281 |
|
1282 |
if __name__ == "__main__":
|