Spaces:
Running
on
Zero
Running
on
Zero
Liam Dyer
commited on
fix metadata
Browse files
app.py
CHANGED
@@ -24,7 +24,16 @@ def convert(pdf_file):
|
|
24 |
for idx, page in enumerate(reader.pages):
|
25 |
full_text += f"\n\n---- Page {idx} ----\n\n" + page.extract_text()
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
gr.Interface(
|
|
|
24 |
for idx, page in enumerate(reader.pages):
|
25 |
full_text += f"\n\n---- Page {idx} ----\n\n" + page.extract_text()
|
26 |
|
27 |
+
# Extract metadata
|
28 |
+
metadata = {
|
29 |
+
"author": reader.metadata.author,
|
30 |
+
"creator": reader.metadata.creator,
|
31 |
+
"producer": reader.metadata.producer,
|
32 |
+
"subject": reader.metadata.subject,
|
33 |
+
"title": reader.metadata.title,
|
34 |
+
}
|
35 |
+
|
36 |
+
return full_text, metadata
|
37 |
|
38 |
|
39 |
gr.Interface(
|