Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,279 +1,24 @@
|
|
1 |
"""
|
2 |
-
|
3 |
-
|
4 |
-
This module implements the Gradio-based web interface for the OpenRouter
|
5 |
-
provider version of DescribePDF.
|
6 |
"""
|
7 |
|
8 |
import gradio as gr
|
9 |
import os
|
10 |
-
import
|
11 |
-
import logging
|
12 |
-
import secrets
|
13 |
-
from typing import Tuple, Optional, Dict, Any, List
|
14 |
-
|
15 |
-
from describepdf import config
|
16 |
-
from describepdf import core
|
17 |
|
18 |
-
|
19 |
-
primary_hue="red",
|
20 |
-
secondary_hue="rose",
|
21 |
-
spacing_size="lg",
|
22 |
-
)
|
23 |
|
24 |
-
def
|
25 |
-
|
26 |
-
ui_api_key: str,
|
27 |
-
ui_vlm_model: str,
|
28 |
-
ui_lang: str,
|
29 |
-
ui_use_md: bool,
|
30 |
-
ui_use_sum: bool,
|
31 |
-
ui_sum_model: str,
|
32 |
-
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
33 |
-
) -> Tuple[str, gr.update, Optional[str]]:
|
34 |
-
"""
|
35 |
-
Wrapper function to call the core conversion process and handle the Gradio UI.
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
ui_api_key: OpenRouter API key from UI
|
40 |
-
ui_vlm_model: VLM model name from UI
|
41 |
-
ui_lang: Output language from UI
|
42 |
-
ui_use_md: Whether to use Markitdown from UI
|
43 |
-
ui_use_sum: Whether to generate a summary from UI
|
44 |
-
ui_sum_model: Summary model name from UI
|
45 |
-
progress: Gradio progress tracker
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
Tuple containing:
|
49 |
-
- str: Status message
|
50 |
-
- gr.update: Download button update
|
51 |
-
- Optional[str]: Markdown result content
|
52 |
-
"""
|
53 |
-
# Validate input file
|
54 |
-
if pdf_file_obj is None:
|
55 |
-
return "Please upload a PDF file.", gr.update(value=None, visible=False), None
|
56 |
-
|
57 |
-
# Load environment config
|
58 |
-
env_config = config.get_config()
|
59 |
-
|
60 |
-
# Prepare configuration for this run
|
61 |
-
api_key = ui_api_key.strip() if ui_api_key.strip() else env_config.get("openrouter_api_key")
|
62 |
-
|
63 |
-
current_run_config: Dict[str, Any] = {
|
64 |
-
"provider": "openrouter",
|
65 |
-
"openrouter_api_key": api_key,
|
66 |
-
"vlm_model": ui_vlm_model,
|
67 |
-
"output_language": ui_lang,
|
68 |
-
"use_markitdown": ui_use_md,
|
69 |
-
"use_summary": ui_use_sum,
|
70 |
-
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
|
71 |
-
}
|
72 |
-
|
73 |
-
# Validate API key
|
74 |
-
if not current_run_config.get("openrouter_api_key"):
|
75 |
-
error_msg = "Error: OpenRouter API Key is missing. Provide it in the UI or set OPENROUTER_API_KEY in the .env file."
|
76 |
-
logging.error(error_msg)
|
77 |
-
return error_msg, gr.update(value=None, visible=False), None
|
78 |
-
|
79 |
-
# Create progress callback for Gradio
|
80 |
-
def progress_callback_gradio(progress_value: float, status: str) -> None:
|
81 |
-
"""
|
82 |
-
Update Gradio progress bar with current progress and status message.
|
83 |
-
|
84 |
-
Args:
|
85 |
-
progress_value (float): Progress value between 0.0 and 1.0
|
86 |
-
status (str): Current status message to display
|
87 |
-
"""
|
88 |
-
clamped_progress = max(0.0, min(1.0, progress_value))
|
89 |
-
progress(clamped_progress, desc=status)
|
90 |
-
logging.info(f"Progress: {status} ({clamped_progress*100:.1f}%)")
|
91 |
-
|
92 |
-
# Run the conversion
|
93 |
-
status_message, result_markdown = core.convert_pdf_to_markdown(
|
94 |
-
pdf_file_obj.name,
|
95 |
-
current_run_config,
|
96 |
-
progress_callback_gradio
|
97 |
-
)
|
98 |
-
|
99 |
-
# Handle the download file
|
100 |
-
if result_markdown:
|
101 |
-
try:
|
102 |
-
# Get base filename from the uploaded PDF
|
103 |
-
base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
|
104 |
-
download_filename = f"{base_name}_description.md"
|
105 |
-
|
106 |
-
# Create a temporary file with a random component to avoid collisions
|
107 |
-
random_suffix = secrets.token_hex(4)
|
108 |
-
temp_dir = tempfile.gettempdir()
|
109 |
-
download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
|
110 |
-
|
111 |
-
# Write markdown result to the temporary file
|
112 |
-
with open(download_filepath, "w", encoding="utf-8") as md_file:
|
113 |
-
md_file.write(result_markdown)
|
114 |
-
|
115 |
-
logging.info(f"Markdown result saved to temporary file for download: {download_filepath}")
|
116 |
-
download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
|
117 |
-
|
118 |
-
except Exception as e:
|
119 |
-
logging.error(f"Error creating temporary file for download: {e}")
|
120 |
-
status_message += " (Error creating download file)"
|
121 |
-
download_button_update = gr.update(value=None, visible=False)
|
122 |
-
else:
|
123 |
-
download_button_update = gr.update(value=None, visible=False)
|
124 |
-
|
125 |
-
return (
|
126 |
-
status_message,
|
127 |
-
download_button_update,
|
128 |
-
result_markdown if result_markdown else ""
|
129 |
-
)
|
130 |
-
|
131 |
-
def create_ui() -> gr.Blocks:
|
132 |
-
"""
|
133 |
-
Create and return the Gradio interface for OpenRouter.
|
134 |
|
135 |
-
|
136 |
-
and configuration. It loads initial settings from the environment config
|
137 |
-
and provides UI components for adjusting settings for each conversion run.
|
138 |
-
|
139 |
-
Returns:
|
140 |
-
gr.Blocks: Configured Gradio interface ready to be launched
|
141 |
-
"""
|
142 |
-
# Load initial config from environment
|
143 |
-
initial_env_config = config.get_config()
|
144 |
-
|
145 |
-
# Define suggested model lists and languages
|
146 |
-
suggested_vlms: List[str] = [
|
147 |
-
"qwen/qwen2.5-vl-72b-instruct",
|
148 |
-
"google/gemini-2.5-pro-preview-03-25",
|
149 |
-
"openai/chatgpt-4o-latest"
|
150 |
-
]
|
151 |
-
|
152 |
-
suggested_llms: List[str] = [
|
153 |
-
"google/gemini-2.5-flash-preview",
|
154 |
-
"openai/chatgpt-4o-latest",
|
155 |
-
"anthropic/claude-3.5-sonnet"
|
156 |
-
]
|
157 |
-
|
158 |
-
suggested_languages: List[str] = [
|
159 |
-
"English", "Spanish", "French", "German",
|
160 |
-
"Chinese", "Japanese", "Italian",
|
161 |
-
"Portuguese", "Russian", "Korean"
|
162 |
-
]
|
163 |
-
|
164 |
-
# Set initial values from config
|
165 |
-
initial_vlm = initial_env_config.get("or_vlm_model")
|
166 |
-
initial_llm = initial_env_config.get("or_summary_model")
|
167 |
-
initial_lang = initial_env_config.get("output_language")
|
168 |
-
initial_use_md = initial_env_config.get("use_markitdown")
|
169 |
-
initial_use_sum = initial_env_config.get("use_summary")
|
170 |
-
|
171 |
-
has_env_api_key = bool(initial_env_config.get("openrouter_api_key"))
|
172 |
-
|
173 |
-
# Create the Gradio interface
|
174 |
-
with gr.Blocks(title="DescribePDF", theme=theme) as iface:
|
175 |
-
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
176 |
-
gr.Markdown(
|
177 |
-
"""<div style="display: flex;align-items: center;justify-content: center">
|
178 |
-
[<a href="https://davidlms.github.io/DescribePDF/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
|
179 |
-
"""
|
180 |
-
)
|
181 |
-
gr.Markdown(
|
182 |
-
"DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
|
183 |
-
"\n\n"
|
184 |
-
"Upload a PDF, adjust settings, and click 'Describe'. "
|
185 |
-
)
|
186 |
-
|
187 |
-
with gr.Tabs():
|
188 |
-
# Generate tab
|
189 |
-
with gr.TabItem("Generate", id=0):
|
190 |
-
with gr.Row():
|
191 |
-
with gr.Column(scale=1):
|
192 |
-
pdf_input = gr.File(
|
193 |
-
label="Upload PDF",
|
194 |
-
file_types=['.pdf'],
|
195 |
-
type="filepath"
|
196 |
-
)
|
197 |
-
convert_button = gr.Button(
|
198 |
-
"Describe",
|
199 |
-
variant="primary"
|
200 |
-
)
|
201 |
-
progress_output = gr.Textbox(
|
202 |
-
label="Progress",
|
203 |
-
interactive=False,
|
204 |
-
lines=2
|
205 |
-
)
|
206 |
-
download_button = gr.File(
|
207 |
-
label="Download Markdown",
|
208 |
-
visible=False,
|
209 |
-
interactive=False
|
210 |
-
)
|
211 |
-
|
212 |
-
with gr.Column(scale=2):
|
213 |
-
markdown_output = gr.Markdown(label="Result (Markdown)")
|
214 |
-
|
215 |
-
# Configuration tab
|
216 |
-
with gr.TabItem("Settings", id=1):
|
217 |
-
gr.Markdown(
|
218 |
-
"Adjust settings for the *next* generation. These settings are **not** saved. "
|
219 |
-
"Defaults are controlled by the `.env` file."
|
220 |
-
)
|
221 |
-
api_key_input = gr.Textbox(
|
222 |
-
label="OpenRouter API Key" + (" (set in .env)" if has_env_api_key else ""),
|
223 |
-
type="password",
|
224 |
-
placeholder="Enter an API key here to override the one in .env" if has_env_api_key else "Enter your OpenRouter API key",
|
225 |
-
value=""
|
226 |
-
)
|
227 |
-
vlm_model_input = gr.Dropdown(
|
228 |
-
label="VLM Model",
|
229 |
-
choices=suggested_vlms,
|
230 |
-
value=initial_vlm,
|
231 |
-
allow_custom_value=True,
|
232 |
-
info="Select or type the OpenRouter VLM model name"
|
233 |
-
)
|
234 |
-
output_language_input = gr.Dropdown(
|
235 |
-
label="Output Language",
|
236 |
-
choices=suggested_languages,
|
237 |
-
value=initial_lang,
|
238 |
-
allow_custom_value=True,
|
239 |
-
info="Select or type the desired output language (e.g., English, Spanish)"
|
240 |
-
)
|
241 |
-
with gr.Row():
|
242 |
-
use_markitdown_checkbox = gr.Checkbox(
|
243 |
-
label="Use Markitdown for extra text context",
|
244 |
-
value=initial_use_md
|
245 |
-
)
|
246 |
-
use_summary_checkbox = gr.Checkbox(
|
247 |
-
label="Use PDF summary for augmented context (requires extra LLM call)",
|
248 |
-
value=initial_use_sum
|
249 |
-
)
|
250 |
-
summary_llm_model_input = gr.Dropdown(
|
251 |
-
label="LLM Model for Summary",
|
252 |
-
choices=suggested_llms,
|
253 |
-
value=initial_llm,
|
254 |
-
allow_custom_value=True,
|
255 |
-
info="Select or type the OpenRouter LLM model name for summaries"
|
256 |
-
)
|
257 |
-
|
258 |
-
# Connect UI components
|
259 |
-
conversion_inputs = [
|
260 |
-
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
261 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
262 |
-
]
|
263 |
-
conversion_outputs = [
|
264 |
-
progress_output, download_button, markdown_output
|
265 |
-
]
|
266 |
-
convert_button.click(
|
267 |
-
fn=generate,
|
268 |
-
inputs=conversion_inputs,
|
269 |
-
outputs=conversion_outputs
|
270 |
-
)
|
271 |
-
|
272 |
-
return iface
|
273 |
|
274 |
-
#
|
275 |
-
app =
|
276 |
|
277 |
-
#
|
278 |
if __name__ == "__main__":
|
279 |
-
app.launch()
|
|
|
1 |
"""
|
2 |
+
Entry point for DescribePDF Hugging Face Space.
|
|
|
|
|
|
|
3 |
"""
|
4 |
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
+
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
def create_interface():
|
12 |
+
from describepdf.ui import create_ui
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
# Crear la interfaz
|
15 |
+
interface = create_ui()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
return interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
# Crear la interfaz
|
20 |
+
app = create_interface()
|
21 |
|
22 |
+
# Para Hugging Face Spaces
|
23 |
if __name__ == "__main__":
|
24 |
+
app.launch(ssr=False)
|