avans06 commited on
Commit
0c0ff58
Β·
1 Parent(s): a347e11

Add application file

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. README.md +2 -2
  3. app.py +432 -0
  4. requirements.txt +5 -0
  5. webui.bat +73 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .vs
2
+ venv
3
+ tmp
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: HtmlToMarkdown
3
- emoji: πŸ†
4
- colorFrom: red
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.29.0
 
1
  ---
2
  title: HtmlToMarkdown
3
+ emoji: πŸ“
4
+ colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.29.0
app.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ο»Ώimport gradio as gr
2
+ import os
3
+ import shutil
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from urllib.parse import urljoin, urlparse
7
+ from queue import Queue
8
+ import time
9
+ import zipfile
10
+ import tempfile
11
+ import sys
12
+ import logging
13
+ import traceback
14
+ import pypandoc
15
+
16
+ # --- Configuration & Logging ---
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
+ USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
19
+ REQUEST_TIMEOUT = 20 # seconds
20
+ POLITENESS_DELAY = 0.3 # seconds between requests
21
+
22
+ # --- Pandoc Check ---
23
+ def check_pandoc_available():
24
+ """Checks if pypandoc can find a pandoc executable."""
25
+ try:
26
+ pandoc_path = pypandoc.get_pandoc_path()
27
+ logging.info(f"pypandoc found Pandoc executable at: {pandoc_path}")
28
+ return True
29
+ except OSError:
30
+ logging.error("pypandoc could not find Pandoc executable.")
31
+ logging.error("Please ensure Pandoc is installed OR install 'pypandoc_binary' (`pip install pypandoc_binary`)")
32
+ return False
33
+ except ImportError:
34
+ logging.error("pypandoc library not found. Please install it (`pip install pypandoc_binary`).")
35
+ return False
36
+
37
+ # --- Core Functions ---
38
+ def fetch_html(url):
39
+ """Fetches HTML content from a given URL."""
40
+ try:
41
+ headers = {'User-Agent': USER_AGENT}
42
+ response = requests.get(url, timeout=REQUEST_TIMEOUT, headers=headers)
43
+ response.raise_for_status()
44
+ response.encoding = response.apparent_encoding if response.apparent_encoding else 'utf-8'
45
+ logging.info(f"Successfully fetched: {url}")
46
+ return response.text
47
+ except requests.exceptions.Timeout:
48
+ logging.error(f"Timeout fetching URL: {url}")
49
+ return None
50
+ except requests.exceptions.RequestException as e:
51
+ logging.error(f"Error fetching URL {url}: {e}")
52
+ return None
53
+ except Exception as e:
54
+ logging.error(f"Unexpected error fetching {url}: {e}")
55
+ return None
56
+
57
+ def convert_html_to_md(html_content, output_md_path, pandoc_output_format, pandoc_extra_args):
58
+ """
59
+ Converts HTML content string to a Markdown file using pypandoc
60
+ with specified format and arguments.
61
+ """
62
+ if not html_content:
63
+ logging.warning(f"Empty HTML content for {output_md_path}. Conversion skipped.")
64
+ return False
65
+ # Using html+smart enables better handling of typographic characters in source HTML
66
+ input_format = 'html+smart' # Keep input format consistent
67
+
68
+ try:
69
+ logging.debug(f"pypandoc converting to {pandoc_output_format} with args: {pandoc_extra_args}")
70
+ # Use pypandoc.convert_text to convert the HTML string
71
+ # Specify input format ('html'), output format ('gfm'), and output file
72
+ # pypandoc handles invoking pandoc correctly with the string input
73
+ output = pypandoc.convert_text(
74
+ source=html_content,
75
+ to=pandoc_output_format,
76
+ format=input_format,
77
+ outputfile=output_md_path,
78
+ extra_args=pandoc_extra_args,
79
+ encoding='utf-8'
80
+ )
81
+
82
+ # When using outputfile, convert_text returns an empty string on success
83
+ if output == "":
84
+ logging.info(f"Successfully converted using pypandoc -> {os.path.basename(output_md_path)}")
85
+ return True
86
+ else:
87
+ logging.error(f"pypandoc conversion to {output_md_path} returned unexpected non-empty output.")
88
+ if os.path.exists(output_md_path) and os.path.getsize(output_md_path) == 0:
89
+ logging.warning(f"Output file {output_md_path} was created but is empty.")
90
+ return False
91
+
92
+ except Exception as e:
93
+ logging.error(f"Error during pypandoc conversion for {output_md_path}: {e}")
94
+ logging.error(traceback.format_exc())
95
+ if os.path.exists(output_md_path) and os.path.getsize(output_md_path) == 0:
96
+ try:
97
+ os.remove(output_md_path)
98
+ logging.info(f"Removed empty/failed output file: {os.path.basename(output_md_path)}")
99
+ except OSError as remove_err:
100
+ logging.warning(f"Could not remove empty/failed output file {output_md_path}: {remove_err}")
101
+ return False
102
+
103
+
104
+ def create_zip_archive(source_dir, output_zip_path):
105
+ """Creates a ZIP archive from the contents of source_dir."""
106
+ try:
107
+ with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
108
+ for root, _, files in os.walk(source_dir):
109
+ for file in files:
110
+ file_path = os.path.join(root, file)
111
+ # Arcname is the path inside the zip file (relative to source_dir)
112
+ arcname = os.path.relpath(file_path, source_dir)
113
+ zipf.write(file_path, arcname)
114
+ logging.info(f"Successfully created ZIP archive: {output_zip_path}")
115
+ return True
116
+ except Exception as e:
117
+ logging.error(f"Failed to create ZIP archive {output_zip_path}: {e}")
118
+ return False
119
+
120
+ # --- Main Gradio Function ---
121
+ def process_conversion_request(start_url_str, restrict_path, use_aggressive_conversion, progress=gr.Progress(track_tqdm=True)):
122
+ """The main function triggered by the Gradio interface."""
123
+
124
+ # --- 0. Check Pandoc via pypandoc ---
125
+ if not check_pandoc_available():
126
+ return "Error: pypandoc could not find a Pandoc executable. Please ensure Pandoc is installed or install `pypandoc_binary`.", None
127
+
128
+ # --- 1. Validate URL and Determine Restriction Path ---
129
+ start_url_str = start_url_str.strip()
130
+ start_path_dir_for_restriction = None # Initialize restriction path base
131
+
132
+ if not start_url_str:
133
+ return "Error: Starting URL cannot be empty.", None
134
+ try:
135
+ parsed_start_url = urlparse(start_url_str)
136
+ if not parsed_start_url.scheme or not parsed_start_url.netloc:
137
+ raise ValueError("Invalid URL format (missing scheme or domain).")
138
+ base_netloc = parsed_start_url.netloc
139
+ base_scheme = parsed_start_url.scheme
140
+
141
+ # Calculate the base directory path for comparison if restriction is enabled
142
+ start_path_cleaned = parsed_start_url.path.strip('/')
143
+ if start_path_cleaned: # If not root path
144
+ # Use os.path.dirname to get the directory part
145
+ # dirname('main/index.html') -> 'main'
146
+ # dirname('main') -> '' (This needs correction if start URL is like /main/)
147
+ # Let's adjust: if no '/' it means it's the first level dir or a root file
148
+ if '/' not in start_path_cleaned and '.' not in start_path_cleaned:
149
+ start_path_dir_for_restriction = start_path_cleaned # e.g. 'main'
150
+ else:
151
+ start_path_dir_for_restriction = os.path.dirname(start_path_cleaned) # e.g. 'main' from main/index.html, or '' from /index.html
152
+ if start_path_dir_for_restriction == '': # Handle case like /index.html correctly
153
+ start_path_dir_for_restriction = None # Treat like root, don't restrict path based on this
154
+
155
+ except ValueError as e:
156
+ return f"Error: Invalid starting URL '{start_url_str}': {e}", None
157
+
158
+ # Log restriction status
159
+ restriction_msg = f"Path restriction enabled: limiting to paths starting like '{start_path_dir_for_restriction}/'." if restrict_path and start_path_dir_for_restriction else "Path restriction disabled or starting from root."
160
+
161
+ # --- Determine Pandoc Settings based on Checkbox ---
162
+ # wrap=none, Prevent auto-wrapping lines
163
+ if use_aggressive_conversion:
164
+ pandoc_format_to_use = 'gfm-raw_html+hard_line_breaks'
165
+ pandoc_args_to_use = ['--wrap=none', '--markdown-headings=atx']
166
+ conversion_mode_msg = "Using aggressive Markdown conversion (less raw HTML, ATX headers)."
167
+ else:
168
+ # Using gfm+hard_line_breaks ensures GitHub compatibility and respects single newlines
169
+ pandoc_format_to_use = 'gfm+hard_line_breaks'
170
+ pandoc_args_to_use = ['--wrap=none']
171
+ conversion_mode_msg = "Using standard Markdown conversion (may preserve more raw HTML)."
172
+
173
+ logging.info(conversion_mode_msg) # Log the mode
174
+
175
+ # --- 2. Setup Temporary Directory & Crawler ---
176
+ staging_dir = tempfile.mkdtemp(prefix="md_convert_")
177
+ logging.info(f"Created temporary staging directory: {staging_dir}")
178
+ output_zip_file = None
179
+
180
+ urls_to_process = Queue()
181
+ processed_urls = set() # Still needed to avoid duplicates
182
+ failed_urls = set()
183
+ converted_count = 0
184
+ url_count_estimate = 1 # Total unique URLs discovered so far (starts with the first one)
185
+ dequeued_count = 0
186
+
187
+ urls_to_process.put(start_url_str)
188
+ processed_urls.add(start_url_str) # Add start URL here
189
+
190
+ log_messages = ["Process started...", restriction_msg, conversion_mode_msg]
191
+
192
+ try:
193
+ # --- 3. Crawl and Convert Loop ---
194
+ while not urls_to_process.empty():
195
+ # --- Get URL and Increment Dequeued Count ---
196
+ current_url = urls_to_process.get()
197
+ dequeued_count += 1 # Increment when an item is taken for processing
198
+
199
+ # --- Update Progress Bar ---
200
+ # Calculate progress based on dequeued items vs. total discovered
201
+ # Denominator is the total number of unique URLs added to processed_urls/queue so far
202
+ denominator = max(1, url_count_estimate) # url_count_estimate increases when new links are found
203
+ current_progress_value = dequeued_count / denominator
204
+
205
+ # Update Gradio progress - use dequeued_count for user display
206
+ # Display: Processed X / Total_Discovered Y
207
+ progress(current_progress_value, desc=f"Processing {dequeued_count}/{url_count_estimate}. Queue: {urls_to_process.qsize()}")
208
+
209
+ # --- Process the current URL ---
210
+ log_message = f"\nProcessing ({dequeued_count}/{url_count_estimate}): {current_url}"
211
+ logging.info(log_message)
212
+ log_messages.append(log_message)
213
+
214
+ # --- 3a. Fetch HTML ---
215
+ time.sleep(POLITENESS_DELAY)
216
+ html_content = fetch_html(current_url)
217
+ if not html_content:
218
+ failed_urls.add(current_url)
219
+ log_message = f" -> Failed to fetch content."
220
+ logging.warning(log_message)
221
+ log_messages.append(log_message)
222
+ continue
223
+
224
+ # --- 3b. Determine Output Path ---
225
+ parsed_current_url = urlparse(current_url)
226
+ # Get the path part of the URL, removing leading/trailing slashes
227
+ url_path_segment = parsed_current_url.path.strip('/') # e.g., "main/index.html", "HEAD/index.html", ""
228
+ # If the path is empty (domain root like https://example.com/), use 'index' as the base name
229
+ if not url_path_segment:
230
+ path_in_zip_base = 'index'
231
+ else:
232
+ path_in_zip_base = url_path_segment # e.g., "main/index.html", "HEAD/index.html"
233
+
234
+ # Now, determine the final .md filename based on the path base
235
+ if path_in_zip_base.lower().endswith('.html'):
236
+ relative_md_filename = os.path.splitext(path_in_zip_base)[0] + ".md"
237
+ elif path_in_zip_base.endswith('/'): # Should not happen often with strip('/') but handle defensively
238
+ # If URL was like /docs/, path_in_zip_base would be 'docs' after strip.
239
+ # This case is less likely needed now, but safe to keep.
240
+ relative_md_filename = os.path.join(path_in_zip_base, "index.md")
241
+ else:
242
+ # If it's not empty and doesn't end with .html, assume it's a directory path
243
+ # Append 'index.md' to treat it like accessing a directory index
244
+ # e.g., if URL path was /main, url_path_segment is 'main', output becomes 'main/index.md'
245
+ # If URL path was /path/to/file (no .html), output becomes 'path/to/file.md' if '.' in basename, else 'path/to/file/index.md'
246
+ basename = os.path.basename(path_in_zip_base)
247
+ if '.' in basename: # Check if it looks like a file without .html extension
248
+ relative_md_filename = path_in_zip_base + ".md"
249
+ else: # Assume it's a directory reference
250
+ relative_md_filename = os.path.join(path_in_zip_base, "index.md")
251
+
252
+ # Construct full path within the temporary staging directory
253
+ output_md_full_path = os.path.join(staging_dir, relative_md_filename)
254
+ output_md_dir = os.path.dirname(output_md_full_path)
255
+
256
+ # Create directories if they don't exist (check if output_md_dir is not empty)
257
+ try:
258
+ if output_md_dir and not os.path.exists(output_md_dir):
259
+ os.makedirs(output_md_dir)
260
+ except OSError as e:
261
+ log_message = f" -> Error creating directory {output_md_dir}: {e}. Skipping conversion for this URL."
262
+ logging.error(log_message)
263
+ log_messages.append(log_message)
264
+ failed_urls.add(current_url)
265
+ continue # Skip to next URL
266
+
267
+ # --- 3c. Convert HTML to Markdown ---
268
+ if convert_html_to_md(html_content, output_md_full_path, pandoc_format_to_use, pandoc_args_to_use):
269
+ converted_count += 1
270
+ log_message = f" -> Converted successfully to {os.path.relpath(output_md_full_path, staging_dir)}"
271
+ logging.info(log_message)
272
+ log_messages.append(log_message)
273
+ else:
274
+ failed_urls.add(current_url)
275
+ log_message = f" -> Conversion failed."
276
+ logging.warning(log_message)
277
+ log_messages.append(log_message)
278
+
279
+ # --- 3d. Find and Add New Links ---
280
+ try:
281
+ soup = BeautifulSoup(html_content, 'lxml')
282
+ links_found_this_page = 0
283
+ links_skipped_due_to_path = 0
284
+ for link in soup.find_all('a', href=True):
285
+ href = link['href']
286
+ absolute_url = urljoin(current_url, href)
287
+ absolute_url = urlparse(absolute_url)._replace(fragment="").geturl()
288
+ parsed_absolute_url = urlparse(absolute_url)
289
+
290
+ # Basic Filtering (scheme, domain, looks like html)
291
+ is_valid_target = (
292
+ parsed_absolute_url.scheme == base_scheme and
293
+ parsed_absolute_url.netloc == base_netloc and
294
+ (not parsed_absolute_url.path or
295
+ parsed_absolute_url.path == '/' or
296
+ parsed_absolute_url.path.lower().endswith('.html') or
297
+ '.' not in os.path.basename(parsed_absolute_url.path.rstrip('/')) # Include directory links
298
+ )
299
+ )
300
+
301
+ if not is_valid_target:
302
+ continue # Skip invalid links early
303
+
304
+ # --- Path Restriction Check ---
305
+ path_restricted = False
306
+ # Only apply if checkbox is checked AND we derived a non-root restriction path
307
+ if restrict_path and start_path_dir_for_restriction is not None:
308
+ candidate_path_clean = parsed_absolute_url.path.strip('/')
309
+ # Check if the cleaned candidate path starts with the restriction dir + '/'
310
+ # OR if the candidate path is exactly the restriction dir (e.g. /main matching main)
311
+ if not (candidate_path_clean.startswith(start_path_dir_for_restriction + '/') or \
312
+ candidate_path_clean == start_path_dir_for_restriction):
313
+ path_restricted = True
314
+ links_skipped_due_to_path += 1
315
+ # --- End Path Restriction Check ---
316
+
317
+ # Add to queue only if NOT restricted and NOT already processed
318
+ if not path_restricted and absolute_url not in processed_urls:
319
+ processed_urls.add(absolute_url) # Add to set immediately
320
+ urls_to_process.put(absolute_url)
321
+ links_found_this_page += 1
322
+ url_count_estimate += 1
323
+
324
+ # Log link discovery summary for the page
325
+ log_links_msg = f" -> Found {links_found_this_page} new link(s) to process."
326
+ if links_skipped_due_to_path > 0:
327
+ log_links_msg += f" Skipped {links_skipped_due_to_path} link(s) due to path restriction."
328
+ logging.info(log_links_msg)
329
+ log_messages.append(log_links_msg)
330
+ except Exception as e:
331
+ log_message = f" -> Error parsing links on {current_url}: {e}"
332
+ logging.error(log_message)
333
+ log_messages.append(log_message)
334
+
335
+ # --- 4. Create ZIP Archive ---
336
+ progress(1.0, desc="Zipping files...")
337
+ log_messages.append("\nCrawling complete. Creating ZIP file...")
338
+ yield "\n".join(log_messages), None
339
+
340
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as temp_zip:
341
+ output_zip_path = temp_zip.name
342
+
343
+ if create_zip_archive(staging_dir, output_zip_path):
344
+ log_messages.append(f"\nProcess finished successfully!")
345
+ log_messages.append(f"Converted {converted_count} pages using {'aggressive' if use_aggressive_conversion else 'standard'} mode.") # Inform user of mode used
346
+ if failed_urls:
347
+ log_messages.append(f"Failed to process {len(failed_urls)} URLs (check logs).")
348
+ log_messages.append(f"ZIP file ready: {os.path.basename(output_zip_path)}")
349
+ yield "\n".join(log_messages), output_zip_path
350
+ else:
351
+ log_messages.append("\nError: Failed to create the final ZIP archive.")
352
+ yield "\n".join(log_messages), None
353
+
354
+ except KeyboardInterrupt:
355
+ log_messages.append("\nProcess interrupted by user.")
356
+ yield "\n".join(log_messages), None
357
+ except Exception as e:
358
+ log_messages.append(f"\nAn unexpected error occurred: {e}")
359
+ logging.error("Unhandled exception in process_conversion_request:")
360
+ logging.error(traceback.format_exc())
361
+ yield "\n".join(log_messages), None
362
+ finally:
363
+ # --- 5. Cleanup ---
364
+ if os.path.exists(staging_dir):
365
+ try:
366
+ shutil.rmtree(staging_dir)
367
+ logging.info(f"Cleaned up temporary directory: {staging_dir}")
368
+ except Exception as e:
369
+ logging.error(f"Error cleaning up temporary directory {staging_dir}: {e}")
370
+
371
+ css = """
372
+ textarea[rows]:not([rows="1"]) {
373
+ overflow-y: auto !important;
374
+ scrollbar-width: thin !important;
375
+ }
376
+ textarea[rows]:not([rows="1"])::-webkit-scrollbar {
377
+ all: initial !important;
378
+ background: #f1f1f1 !important;
379
+ }
380
+ textarea[rows]:not([rows="1"])::-webkit-scrollbar-thumb {
381
+ all: initial !important;
382
+ background: #a8a8a8 !important;
383
+ }
384
+ """
385
+
386
+ # --- Gradio UI Definition ---
387
+ with gr.Blocks(title="HTML Docs to Markdown Converter", css=css) as demo:
388
+ gr.Markdown(
389
+ """
390
+ # HTML Documentation to Markdown Converter (via pypandoc)
391
+ Enter the starting `index.html` URL of an online documentation site.
392
+ The script will crawl internal HTML links, convert pages to Markdown, and package results into a ZIP file.
393
+ **Requires `pip install pypandoc_binary`**.
394
+ """
395
+ )
396
+
397
+ with gr.Row():
398
+ url_input = gr.Textbox(
399
+ label="Starting Index HTML URL",
400
+ placeholder="e.g., https://dghs-imgutils.deepghs.org/main/index.html"
401
+ )
402
+
403
+ with gr.Row():
404
+ restrict_path_checkbox = gr.Checkbox(
405
+ label="Restrict crawl to starting path structure (e.g., if start is '/main/index.html', only crawl '/main/...' URLs)",
406
+ value=True # Default to restricting path
407
+ )
408
+ aggressive_md_checkbox = gr.Checkbox(
409
+ label="Aggressive Markdown conversion (disable raw HTML, use ATX headers)",
410
+ value=True # Default to aggressive conversion
411
+ )
412
+
413
+ with gr.Row():
414
+ start_button = gr.Button("Start Conversion", variant="primary")
415
+
416
+ with gr.Row():
417
+ log_output = gr.Textbox(label="Progress Logs", lines=15, interactive=False, show_copy_button=True)
418
+
419
+ with gr.Row():
420
+ zip_output = gr.File(label="Download Markdown ZIP")
421
+
422
+ start_button.click(
423
+ fn=process_conversion_request,
424
+ inputs=[url_input, restrict_path_checkbox, aggressive_md_checkbox],
425
+ outputs=[log_output, zip_output],
426
+ show_progress="full"
427
+ )
428
+
429
+ # --- Launch App ---
430
+ if __name__ == "__main__":
431
+ demo.queue()
432
+ demo.launch(inbrowser=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==5.29.0
2
+ requests
3
+ beautifulsoup4
4
+ lxml
5
+ pypandoc_binary
webui.bat ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ :: The source of the webui.bat file is stable-diffusion-webui
4
+ :: set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
5
+
6
+ if not defined PYTHON (set PYTHON=python)
7
+ if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
8
+
9
+ mkdir tmp 2>NUL
10
+
11
+ %PYTHON% -c "" >tmp/stdout.txt 2>tmp/stderr.txt
12
+ if %ERRORLEVEL% == 0 goto :check_pip
13
+ echo Couldn't launch python
14
+ goto :show_stdout_stderr
15
+
16
+ :check_pip
17
+ %PYTHON% -mpip --help >tmp/stdout.txt 2>tmp/stderr.txt
18
+ if %ERRORLEVEL% == 0 goto :start_venv
19
+ if "%PIP_INSTALLER_LOCATION%" == "" goto :show_stdout_stderr
20
+ %PYTHON% "%PIP_INSTALLER_LOCATION%" >tmp/stdout.txt 2>tmp/stderr.txt
21
+ if %ERRORLEVEL% == 0 goto :start_venv
22
+ echo Couldn't install pip
23
+ goto :show_stdout_stderr
24
+
25
+ :start_venv
26
+ if ["%VENV_DIR%"] == ["-"] goto :skip_venv
27
+ if ["%SKIP_VENV%"] == ["1"] goto :skip_venv
28
+
29
+ dir "%VENV_DIR%\Scripts\Python.exe" >tmp/stdout.txt 2>tmp/stderr.txt
30
+ if %ERRORLEVEL% == 0 goto :activate_venv
31
+
32
+ for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i"
33
+ echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME%
34
+ %PYTHON_FULLNAME% -m venv "%VENV_DIR%" >tmp/stdout.txt 2>tmp/stderr.txt
35
+ if %ERRORLEVEL% == 0 goto :activate_venv
36
+ echo Unable to create venv in directory "%VENV_DIR%"
37
+ goto :show_stdout_stderr
38
+
39
+ :activate_venv
40
+ set PYTHON="%VENV_DIR%\Scripts\Python.exe"
41
+ echo venv %PYTHON%
42
+
43
+ :skip_venv
44
+ goto :launch
45
+
46
+ :launch
47
+ %PYTHON% app.py %COMMANDLINE_ARGS% %*
48
+ pause
49
+ exit /b
50
+
51
+ :show_stdout_stderr
52
+
53
+ echo.
54
+ echo exit code: %errorlevel%
55
+
56
+ for /f %%i in ("tmp\stdout.txt") do set size=%%~zi
57
+ if %size% equ 0 goto :show_stderr
58
+ echo.
59
+ echo stdout:
60
+ type tmp\stdout.txt
61
+
62
+ :show_stderr
63
+ for /f %%i in ("tmp\stderr.txt") do set size=%%~zi
64
+ if %size% equ 0 goto :show_stderr
65
+ echo.
66
+ echo stderr:
67
+ type tmp\stderr.txt
68
+
69
+ :endofscript
70
+
71
+ echo.
72
+ echo Launch unsuccessful. Exiting.
73
+ pause