davidlms commited on
Commit
12eacd9
·
verified ·
1 Parent(s): 4d54a95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -267
app.py CHANGED
@@ -1,279 +1,24 @@
1
  """
2
- Web UI module for DescribePDF with OpenRouter.
3
-
4
- This module implements the Gradio-based web interface for the OpenRouter
5
- provider version of DescribePDF.
6
  """
7
 
8
  import gradio as gr
9
  import os
10
- import tempfile
11
- import logging
12
- import secrets
13
- from typing import Tuple, Optional, Dict, Any, List
14
-
15
- from describepdf import config
16
- from describepdf import core
17
 
18
- theme = gr.themes.Soft(
19
- primary_hue="red",
20
- secondary_hue="rose",
21
- spacing_size="lg",
22
- )
23
 
24
- def generate(
25
- pdf_file_obj: Optional[gr.File],
26
- ui_api_key: str,
27
- ui_vlm_model: str,
28
- ui_lang: str,
29
- ui_use_md: bool,
30
- ui_use_sum: bool,
31
- ui_sum_model: str,
32
- progress: gr.Progress = gr.Progress(track_tqdm=True)
33
- ) -> Tuple[str, gr.update, Optional[str]]:
34
- """
35
- Wrapper function to call the core conversion process and handle the Gradio UI.
36
 
37
- Args:
38
- pdf_file_obj: Gradio File object for the uploaded PDF
39
- ui_api_key: OpenRouter API key from UI
40
- ui_vlm_model: VLM model name from UI
41
- ui_lang: Output language from UI
42
- ui_use_md: Whether to use Markitdown from UI
43
- ui_use_sum: Whether to generate a summary from UI
44
- ui_sum_model: Summary model name from UI
45
- progress: Gradio progress tracker
46
-
47
- Returns:
48
- Tuple containing:
49
- - str: Status message
50
- - gr.update: Download button update
51
- - Optional[str]: Markdown result content
52
- """
53
- # Validate input file
54
- if pdf_file_obj is None:
55
- return "Please upload a PDF file.", gr.update(value=None, visible=False), None
56
-
57
- # Load environment config
58
- env_config = config.get_config()
59
-
60
- # Prepare configuration for this run
61
- api_key = ui_api_key.strip() if ui_api_key.strip() else env_config.get("openrouter_api_key")
62
-
63
- current_run_config: Dict[str, Any] = {
64
- "provider": "openrouter",
65
- "openrouter_api_key": api_key,
66
- "vlm_model": ui_vlm_model,
67
- "output_language": ui_lang,
68
- "use_markitdown": ui_use_md,
69
- "use_summary": ui_use_sum,
70
- "summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
71
- }
72
-
73
- # Validate API key
74
- if not current_run_config.get("openrouter_api_key"):
75
- error_msg = "Error: OpenRouter API Key is missing. Provide it in the UI or set OPENROUTER_API_KEY in the .env file."
76
- logging.error(error_msg)
77
- return error_msg, gr.update(value=None, visible=False), None
78
-
79
- # Create progress callback for Gradio
80
- def progress_callback_gradio(progress_value: float, status: str) -> None:
81
- """
82
- Update Gradio progress bar with current progress and status message.
83
-
84
- Args:
85
- progress_value (float): Progress value between 0.0 and 1.0
86
- status (str): Current status message to display
87
- """
88
- clamped_progress = max(0.0, min(1.0, progress_value))
89
- progress(clamped_progress, desc=status)
90
- logging.info(f"Progress: {status} ({clamped_progress*100:.1f}%)")
91
-
92
- # Run the conversion
93
- status_message, result_markdown = core.convert_pdf_to_markdown(
94
- pdf_file_obj.name,
95
- current_run_config,
96
- progress_callback_gradio
97
- )
98
-
99
- # Handle the download file
100
- if result_markdown:
101
- try:
102
- # Get base filename from the uploaded PDF
103
- base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
104
- download_filename = f"{base_name}_description.md"
105
-
106
- # Create a temporary file with a random component to avoid collisions
107
- random_suffix = secrets.token_hex(4)
108
- temp_dir = tempfile.gettempdir()
109
- download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
110
-
111
- # Write markdown result to the temporary file
112
- with open(download_filepath, "w", encoding="utf-8") as md_file:
113
- md_file.write(result_markdown)
114
-
115
- logging.info(f"Markdown result saved to temporary file for download: {download_filepath}")
116
- download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
117
-
118
- except Exception as e:
119
- logging.error(f"Error creating temporary file for download: {e}")
120
- status_message += " (Error creating download file)"
121
- download_button_update = gr.update(value=None, visible=False)
122
- else:
123
- download_button_update = gr.update(value=None, visible=False)
124
-
125
- return (
126
- status_message,
127
- download_button_update,
128
- result_markdown if result_markdown else ""
129
- )
130
-
131
- def create_ui() -> gr.Blocks:
132
- """
133
- Create and return the Gradio interface for OpenRouter.
134
 
135
- This function sets up a Gradio web interface with tabs for PDF conversion
136
- and configuration. It loads initial settings from the environment config
137
- and provides UI components for adjusting settings for each conversion run.
138
-
139
- Returns:
140
- gr.Blocks: Configured Gradio interface ready to be launched
141
- """
142
- # Load initial config from environment
143
- initial_env_config = config.get_config()
144
-
145
- # Define suggested model lists and languages
146
- suggested_vlms: List[str] = [
147
- "qwen/qwen2.5-vl-72b-instruct",
148
- "google/gemini-2.5-pro-preview-03-25",
149
- "openai/chatgpt-4o-latest"
150
- ]
151
-
152
- suggested_llms: List[str] = [
153
- "google/gemini-2.5-flash-preview",
154
- "openai/chatgpt-4o-latest",
155
- "anthropic/claude-3.5-sonnet"
156
- ]
157
-
158
- suggested_languages: List[str] = [
159
- "English", "Spanish", "French", "German",
160
- "Chinese", "Japanese", "Italian",
161
- "Portuguese", "Russian", "Korean"
162
- ]
163
-
164
- # Set initial values from config
165
- initial_vlm = initial_env_config.get("or_vlm_model")
166
- initial_llm = initial_env_config.get("or_summary_model")
167
- initial_lang = initial_env_config.get("output_language")
168
- initial_use_md = initial_env_config.get("use_markitdown")
169
- initial_use_sum = initial_env_config.get("use_summary")
170
-
171
- has_env_api_key = bool(initial_env_config.get("openrouter_api_key"))
172
-
173
- # Create the Gradio interface
174
- with gr.Blocks(title="DescribePDF", theme=theme) as iface:
175
- gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
176
- gr.Markdown(
177
- """<div style="display: flex;align-items: center;justify-content: center">
178
- [<a href="https://davidlms.github.io/DescribePDF/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
179
- """
180
- )
181
- gr.Markdown(
182
- "DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
183
- "\n\n"
184
- "Upload a PDF, adjust settings, and click 'Describe'. "
185
- )
186
-
187
- with gr.Tabs():
188
- # Generate tab
189
- with gr.TabItem("Generate", id=0):
190
- with gr.Row():
191
- with gr.Column(scale=1):
192
- pdf_input = gr.File(
193
- label="Upload PDF",
194
- file_types=['.pdf'],
195
- type="filepath"
196
- )
197
- convert_button = gr.Button(
198
- "Describe",
199
- variant="primary"
200
- )
201
- progress_output = gr.Textbox(
202
- label="Progress",
203
- interactive=False,
204
- lines=2
205
- )
206
- download_button = gr.File(
207
- label="Download Markdown",
208
- visible=False,
209
- interactive=False
210
- )
211
-
212
- with gr.Column(scale=2):
213
- markdown_output = gr.Markdown(label="Result (Markdown)")
214
-
215
- # Configuration tab
216
- with gr.TabItem("Settings", id=1):
217
- gr.Markdown(
218
- "Adjust settings for the *next* generation. These settings are **not** saved. "
219
- "Defaults are controlled by the `.env` file."
220
- )
221
- api_key_input = gr.Textbox(
222
- label="OpenRouter API Key" + (" (set in .env)" if has_env_api_key else ""),
223
- type="password",
224
- placeholder="Enter an API key here to override the one in .env" if has_env_api_key else "Enter your OpenRouter API key",
225
- value=""
226
- )
227
- vlm_model_input = gr.Dropdown(
228
- label="VLM Model",
229
- choices=suggested_vlms,
230
- value=initial_vlm,
231
- allow_custom_value=True,
232
- info="Select or type the OpenRouter VLM model name"
233
- )
234
- output_language_input = gr.Dropdown(
235
- label="Output Language",
236
- choices=suggested_languages,
237
- value=initial_lang,
238
- allow_custom_value=True,
239
- info="Select or type the desired output language (e.g., English, Spanish)"
240
- )
241
- with gr.Row():
242
- use_markitdown_checkbox = gr.Checkbox(
243
- label="Use Markitdown for extra text context",
244
- value=initial_use_md
245
- )
246
- use_summary_checkbox = gr.Checkbox(
247
- label="Use PDF summary for augmented context (requires extra LLM call)",
248
- value=initial_use_sum
249
- )
250
- summary_llm_model_input = gr.Dropdown(
251
- label="LLM Model for Summary",
252
- choices=suggested_llms,
253
- value=initial_llm,
254
- allow_custom_value=True,
255
- info="Select or type the OpenRouter LLM model name for summaries"
256
- )
257
-
258
- # Connect UI components
259
- conversion_inputs = [
260
- pdf_input, api_key_input, vlm_model_input, output_language_input,
261
- use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
262
- ]
263
- conversion_outputs = [
264
- progress_output, download_button, markdown_output
265
- ]
266
- convert_button.click(
267
- fn=generate,
268
- inputs=conversion_inputs,
269
- outputs=conversion_outputs
270
- )
271
-
272
- return iface
273
 
274
- # Create the Gradio interface
275
- app = create_ui()
276
 
277
- # This will be used by Gradio when deployed
278
  if __name__ == "__main__":
279
- app.launch()
 
1
  """
2
+ Entry point for DescribePDF Hugging Face Space.
 
 
 
3
  """
4
 
5
  import gradio as gr
6
  import os
7
+ import sys
 
 
 
 
 
 
8
 
9
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
 
 
 
10
 
11
+ def create_interface():
12
+ from describepdf.ui import create_ui
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Crear la interfaz
15
+ interface = create_ui()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Crear la interfaz
20
+ app = create_interface()
21
 
22
+ # Para Hugging Face Spaces
23
  if __name__ == "__main__":
24
+ app.launch(ssr=False)