marcosremar2 commited on
Commit
78bc6bc
·
1 Parent(s): 9618468

Fix: Replace magic_pdf.api import with direct magic_pdf usage

Browse files
Files changed (1) hide show
  1. app.py +33 -3
app.py CHANGED
@@ -1,6 +1,9 @@
1
  from fastapi import FastAPI, UploadFile, File
2
  from fastapi.responses import JSONResponse
3
- from magic_pdf.api.magic_pdf_api import process_pdf
 
 
 
4
 
5
  app = FastAPI()
6
 
@@ -8,7 +11,34 @@ app = FastAPI()
8
  async def extract(file: UploadFile = File(...)):
9
  content = await file.read()
10
  try:
11
- result = process_pdf(pdf_bytes=content, config_path="/root/magic-pdf.json")
12
- return {"result": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  except Exception as e:
14
  return JSONResponse(status_code=500, content={"error": str(e)})
 
1
  from fastapi import FastAPI, UploadFile, File
2
  from fastapi.responses import JSONResponse
3
+ import magic_pdf
4
+ import tempfile
5
+ import os
6
+ import json
7
 
8
  app = FastAPI()
9
 
 
11
  async def extract(file: UploadFile = File(...)):
12
  content = await file.read()
13
  try:
14
+ # Save the uploaded PDF to a temporary file
15
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
16
+ temp_pdf.write(content)
17
+ temp_pdf_path = temp_pdf.name
18
+
19
+ # Process the PDF using magic_pdf.PDF class
20
+ result = magic_pdf.PDF(temp_pdf_path).parse()
21
+
22
+ # Convert result to dictionary
23
+ output = {
24
+ "pages": []
25
+ }
26
+
27
+ for page in result.pages:
28
+ page_data = {
29
+ "page_num": page.page_num,
30
+ "text": "\n".join([block.text for block in page.text_blocks]),
31
+ "tables": []
32
+ }
33
+
34
+ for table in page.tables:
35
+ page_data["tables"].append(table.to_markdown())
36
+
37
+ output["pages"].append(page_data)
38
+
39
+ # Clean up the temporary file
40
+ os.unlink(temp_pdf_path)
41
+
42
+ return {"result": output}
43
  except Exception as e:
44
  return JSONResponse(status_code=500, content={"error": str(e)})