prithivMLmods commited on
Commit
ba108dd
Β·
verified Β·
1 Parent(s): 6d19175

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -115
app.py CHANGED
@@ -1,115 +1,115 @@
1
- import os
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from urllib.parse import urljoin, urlparse
5
- from zipfile import ZipFile
6
- from io import BytesIO
7
- import gradio as gr
8
-
9
- def download_file(url, session):
10
- """Download a file and return its content."""
11
- try:
12
- response = session.get(url)
13
- response.raise_for_status()
14
- return response.content
15
- except requests.exceptions.RequestException as e:
16
- print(f"Error downloading {url}: {e}")
17
- return None
18
-
19
- def save_webpage_as_zip(url):
20
- """Save a webpage and its assets as a ZIP file."""
21
- session = requests.Session()
22
- response = session.get(url)
23
- response.raise_for_status()
24
-
25
- soup = BeautifulSoup(response.content, 'html.parser')
26
- temp_dir = 'temp_webpage'
27
- if not os.path.exists(temp_dir):
28
- os.makedirs(temp_dir)
29
-
30
- main_html_path = os.path.join(temp_dir, 'index.html')
31
- with open(main_html_path, 'wb') as f:
32
- f.write(response.content)
33
-
34
- assets = []
35
- for tag in soup.find_all(['img', 'link', 'script']):
36
- if tag.name == 'img' and tag.get('src'):
37
- assets.append(tag['src'])
38
- elif tag.name == 'link' and tag.get('href'):
39
- assets.append(tag['href'])
40
- elif tag.name == 'script' and tag.get('src'):
41
- assets.append(tag['src'])
42
-
43
- for asset in assets:
44
- asset_url = urljoin(url, asset)
45
- asset_path = urlparse(asset_url).path.lstrip('/')
46
- asset_full_path = os.path.join(temp_dir, asset_path)
47
-
48
- if asset_path.endswith('/'):
49
- print(f"Skipping directory {asset_full_path}")
50
- continue
51
-
52
- os.makedirs(os.path.dirname(asset_full_path), exist_ok=True)
53
-
54
- content = download_file(asset_url, session)
55
- if content:
56
- if os.path.isdir(asset_full_path):
57
- print(f"Skipping directory {asset_full_path}")
58
- continue
59
- with open(asset_full_path, 'wb') as f:
60
- f.write(content)
61
-
62
- zip_buffer = BytesIO()
63
- with ZipFile(zip_buffer, 'w') as zipf:
64
- for root, _, files in os.walk(temp_dir):
65
- for file in files:
66
- file_path = os.path.join(root, file)
67
- zipf.write(file_path, os.path.relpath(file_path, temp_dir))
68
-
69
- for root, _, files in os.walk(temp_dir, topdown=False):
70
- for file in files:
71
- os.remove(os.path.join(root, file))
72
- os.rmdir(root)
73
- zip_buffer.seek(0)
74
- return zip_buffer
75
-
76
- def generate_zip_file(url):
77
- """Generate ZIP file from a webpage URL."""
78
- zip_buffer = save_webpage_as_zip(url)
79
- temp_zip_path = "webpage.zip"
80
- with open(temp_zip_path, 'wb') as f:
81
- f.write(zip_buffer.read())
82
- return temp_zip_path
83
-
84
- examples = [
85
- "https://www.bmw.com/en/index.html",
86
- "https://www.ferrari.com/en-EN",
87
- "https://streamlit.io/"
88
- ]
89
-
90
- DESCRIPTION = """
91
-
92
- ## Webpage to ZIP Downloader πŸ”—
93
- """
94
-
95
- with gr.Blocks(theme="bethecloud/storj_theme") as demo:
96
- gr.Markdown(DESCRIPTION)
97
- gr.Markdown("Enter a URL to download the webpage and its assets as a ZIP file.")
98
-
99
- url_input = gr.Textbox(label="Website URL", placeholder="Enter a URL (e.g., https://www.example.com)")
100
-
101
- download_button = gr.Button("Download as ZIP")
102
- output_file = gr.File(label="Download")
103
-
104
- def set_example_url(url):
105
- url_input.value = url
106
-
107
- download_button.click(fn=generate_zip_file, inputs=url_input, outputs=output_file)
108
-
109
- gr.Examples(
110
- examples=examples,
111
- inputs=url_input,
112
- outputs=output_file,
113
- fn=generate_zip_file
114
- )
115
- demo.launch()
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urljoin, urlparse
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+ import gradio as gr
8
+
9
+ def download_file(url, session):
10
+ """Download a file and return its content."""
11
+ try:
12
+ response = session.get(url)
13
+ response.raise_for_status()
14
+ return response.content
15
+ except requests.exceptions.RequestException as e:
16
+ print(f"Error downloading {url}: {e}")
17
+ return None
18
+
19
+ def save_webpage_as_zip(url):
20
+ """Save a webpage and its assets as a ZIP file."""
21
+ session = requests.Session()
22
+ response = session.get(url)
23
+ response.raise_for_status()
24
+
25
+ soup = BeautifulSoup(response.content, 'html.parser')
26
+ temp_dir = 'temp_webpage'
27
+ if not os.path.exists(temp_dir):
28
+ os.makedirs(temp_dir)
29
+
30
+ main_html_path = os.path.join(temp_dir, 'index.html')
31
+ with open(main_html_path, 'wb') as f:
32
+ f.write(response.content)
33
+
34
+ assets = []
35
+ for tag in soup.find_all(['img', 'link', 'script']):
36
+ if tag.name == 'img' and tag.get('src'):
37
+ assets.append(tag['src'])
38
+ elif tag.name == 'link' and tag.get('href'):
39
+ assets.append(tag['href'])
40
+ elif tag.name == 'script' and tag.get('src'):
41
+ assets.append(tag['src'])
42
+
43
+ for asset in assets:
44
+ asset_url = urljoin(url, asset)
45
+ asset_path = urlparse(asset_url).path.lstrip('/')
46
+ asset_full_path = os.path.join(temp_dir, asset_path)
47
+
48
+ if asset_path.endswith('/'):
49
+ print(f"Skipping directory {asset_full_path}")
50
+ continue
51
+
52
+ os.makedirs(os.path.dirname(asset_full_path), exist_ok=True)
53
+
54
+ content = download_file(asset_url, session)
55
+ if content:
56
+ if os.path.isdir(asset_full_path):
57
+ print(f"Skipping directory {asset_full_path}")
58
+ continue
59
+ with open(asset_full_path, 'wb') as f:
60
+ f.write(content)
61
+
62
+ zip_buffer = BytesIO()
63
+ with ZipFile(zip_buffer, 'w') as zipf:
64
+ for root, _, files in os.walk(temp_dir):
65
+ for file in files:
66
+ file_path = os.path.join(root, file)
67
+ zipf.write(file_path, os.path.relpath(file_path, temp_dir))
68
+
69
+ for root, _, files in os.walk(temp_dir, topdown=False):
70
+ for file in files:
71
+ os.remove(os.path.join(root, file))
72
+ os.rmdir(root)
73
+ zip_buffer.seek(0)
74
+ return zip_buffer
75
+
76
+ def generate_zip_file(url):
77
+ """Generate ZIP file from a webpage URL."""
78
+ zip_buffer = save_webpage_as_zip(url)
79
+ temp_zip_path = "webpage.zip"
80
+ with open(temp_zip_path, 'wb') as f:
81
+ f.write(zip_buffer.read())
82
+ return temp_zip_path
83
+
84
+ examples = [
85
+ "https://www.bmw.com/en/index.html",
86
+ "https://www.ferrari.com/en-EN",
87
+ "https://streamlit.io/"
88
+ ]
89
+
90
+ DESCRIPTION = """
91
+
92
+ ## Webpage to ZIP Downloader πŸ”—
93
+ """
94
+
95
+ with gr.Blocks(theme="bethecloud/storj_theme") as demo:
96
+ gr.Markdown(DESCRIPTION)
97
+ gr.Markdown("Enter a URL to download the webpage and its assets as a ZIP file.")
98
+
99
+ url_input = gr.Textbox(label="Website URL", placeholder="Enter a URL (e.g., https://www.example.com)")
100
+
101
+ download_button = gr.Button("Download as ZIP")
102
+ output_file = gr.File(label="Download")
103
+
104
+ def set_example_url(url):
105
+ url_input.value = url
106
+
107
+ download_button.click(fn=generate_zip_file, inputs=url_input, outputs=output_file)
108
+
109
+ gr.Examples(
110
+ examples=examples,
111
+ inputs=url_input,
112
+ outputs=output_file,
113
+ fn=generate_zip_file
114
+ )
115
+ demo.launch(ssr_mode=False)