Deadmon commited on
Commit
ab9d843
·
verified ·
1 Parent(s): c38a729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -2
app.py CHANGED
@@ -1,10 +1,134 @@
1
- import gradio as gr
2
- from script import process_pdf # Assuming the above script is saved as script.py
3
  from pathlib import Path
 
 
 
 
 
 
 
4
 
 
5
  OUTPUT_DIR = Path("outputs")
6
  OUTPUT_DIR.mkdir(exist_ok=True)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def process_uploaded_pdf(pdf_file):
9
  if pdf_file is None:
10
  return "Please upload a PDF file."
 
1
+ import os
 
2
  from pathlib import Path
3
+ import fitz # PyMuPDF for PDF handling
4
+ from PIL import Image
5
+ import pytesseract # For OCR
6
+ from transformers import BlipProcessor, BlipForConditionalGeneration # For image captioning
7
+ import io
8
+ import torch
9
+ import gradio as gr
10
 
11
+ # Create output directory
12
  OUTPUT_DIR = Path("outputs")
13
  OUTPUT_DIR.mkdir(exist_ok=True)
14
 
15
+ def pdf_to_images(pdf_path):
16
+ """
17
+ Convert PDF pages to appropriately sized images
18
+ """
19
+ try:
20
+ # Open the PDF
21
+ pdf_document = fitz.open(pdf_path)
22
+ images = []
23
+
24
+ for page_num in range(len(pdf_document)):
25
+ page = pdf_document[page_num]
26
+
27
+ # Get the page dimensions to determine appropriate resolution
28
+ rect = page.rect
29
+ width = rect.width
30
+ height = rect.height
31
+
32
+ # Calculate appropriate zoom factor to get good quality images
33
+ # Aim for approximately 2000 pixels on the longest side
34
+ zoom = 2000 / max(width, height)
35
+
36
+ # Create a transformation matrix
37
+ mat = fitz.Matrix(zoom, zoom)
38
+
39
+ # Render page to an image
40
+ pix = page.get_pixmap(matrix=mat)
41
+
42
+ # Convert to PIL Image
43
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
44
+
45
+ # Save image
46
+ image_path = OUTPUT_DIR / f"page_{page_num + 1}.png"
47
+ img.save(image_path, "PNG")
48
+ images.append((image_path, img))
49
+
50
+ pdf_document.close()
51
+ return images
52
+ except Exception as e:
53
+ print(f"Error converting PDF to images: {str(e)}")
54
+ return []
55
+
56
+ def extract_text_from_image(image):
57
+ """
58
+ Extract text from an image using OCR
59
+ """
60
+ try:
61
+ text = pytesseract.image_to_string(image)
62
+ return text.strip()
63
+ except Exception as e:
64
+ print(f"Error during OCR: {str(e)}")
65
+ return ""
66
+
67
+ def analyze_image(image_path):
68
+ """
69
+ Analyze image content using BLIP model for image captioning
70
+ """
71
+ try:
72
+ # Load BLIP model and processor
73
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
74
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
75
+
76
+ # Load and process image
77
+ image = Image.open(image_path).convert('RGB')
78
+ inputs = processor(image, return_tensors="pt")
79
+
80
+ # Generate caption
81
+ with torch.no_grad():
82
+ outputs = model.generate(**inputs)
83
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
84
+
85
+ return caption
86
+ except Exception as e:
87
+ print(f"Error during image analysis: {str(e)}")
88
+ return "Image content could not be analyzed."
89
+
90
+ def process_pdf(pdf_path, output_txt_path):
91
+ """
92
+ Main function to process the PDF and generate output
93
+ """
94
+ # Convert PDF to images
95
+ print("Converting PDF to images...")
96
+ images = pdf_to_images(pdf_path)
97
+
98
+ if not images:
99
+ print("No images were generated from the PDF.")
100
+ return
101
+
102
+ # Prepare output file
103
+ with open(output_txt_path, 'w', encoding='utf-8') as f:
104
+ f.write(f"Analysis of {os.path.basename(pdf_path)}\n")
105
+ f.write("=" * 50 + "\n\n")
106
+
107
+ # Process each page
108
+ for page_num, (image_path, image) in enumerate(images, 1):
109
+ print(f"Processing page {page_num}...")
110
+
111
+ # Write page header
112
+ f.write(f"Page {page_num}\n")
113
+ f.write("-" * 30 + "\n\n")
114
+
115
+ # Extract and write text
116
+ text = extract_text_from_image(image)
117
+ if text:
118
+ f.write("Extracted Text:\n")
119
+ f.write(text)
120
+ f.write("\n\n")
121
+ else:
122
+ f.write("No text could be extracted from this page.\n\n")
123
+
124
+ # Analyze image and write description
125
+ description = analyze_image(image_path)
126
+ f.write("Image Description:\n")
127
+ f.write(f"{description}\n")
128
+ f.write("\n" + "=" * 50 + "\n\n")
129
+
130
+ print(f"Processing complete. Results saved to {output_txt_path}")
131
+
132
  def process_uploaded_pdf(pdf_file):
133
  if pdf_file is None:
134
  return "Please upload a PDF file."