Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -128,6 +128,9 @@ async def download_and_save_pdf(session, title, paper_info, directory):
|
|
128 |
async with session.get(pdf_url) as response:
|
129 |
pdf_content = await response.read()
|
130 |
|
|
|
|
|
|
|
131 |
safe_title = safe_filename(title)
|
132 |
filename = f"{safe_title}.pdf"
|
133 |
filepath = os.path.join(directory, filename)
|
@@ -155,14 +158,16 @@ async def process_papers(data, directory, progress=gr.Progress()):
|
|
155 |
return "\n".join(results)
|
156 |
|
157 |
def zip_directory(directory):
|
158 |
-
"""Zip the entire directory."""
|
159 |
zip_filename = f"{directory}.zip"
|
160 |
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
161 |
for root, _, files in os.walk(directory):
|
162 |
for file in files:
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
166 |
return zip_filename
|
167 |
|
168 |
def get_base64_download_link(file_path):
|
@@ -196,7 +201,10 @@ def download_all_papers(progress=gr.Progress()):
|
|
196 |
|
197 |
existing_links = get_existing_zip_links()
|
198 |
|
199 |
-
|
|
|
|
|
|
|
200 |
|
201 |
with gr.Blocks() as demo:
|
202 |
gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
|
|
|
128 |
async with session.get(pdf_url) as response:
|
129 |
pdf_content = await response.read()
|
130 |
|
131 |
+
if len(pdf_content) < 2048: # Check if the PDF is less than 2KB
|
132 |
+
return f"Downloaded PDF for {title} is too small (less than 2KB). Skipping."
|
133 |
+
|
134 |
safe_title = safe_filename(title)
|
135 |
filename = f"{safe_title}.pdf"
|
136 |
filepath = os.path.join(directory, filename)
|
|
|
158 |
return "\n".join(results)
|
159 |
|
160 |
def zip_directory(directory):
|
161 |
+
"""Zip the entire directory, excluding files smaller than 2KB."""
|
162 |
zip_filename = f"{directory}.zip"
|
163 |
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
164 |
for root, _, files in os.walk(directory):
|
165 |
for file in files:
|
166 |
+
file_path = os.path.join(root, file)
|
167 |
+
if os.path.getsize(file_path) >= 2048: # Only include files 2KB or larger
|
168 |
+
zipf.write(file_path,
|
169 |
+
os.path.relpath(file_path,
|
170 |
+
os.path.join(directory, '..')))
|
171 |
return zip_filename
|
172 |
|
173 |
def get_base64_download_link(file_path):
|
|
|
201 |
|
202 |
existing_links = get_existing_zip_links()
|
203 |
|
204 |
+
# Count successful downloads
|
205 |
+
successful_downloads = sum(1 for result in results.split('\n') if result.startswith("Successfully saved:"))
|
206 |
+
|
207 |
+
return f"Papers downloaded: {successful_downloads} out of {len(all_data)}\nAll papers have been processed and saved in {zip_file}\n\n{results}", f"{download_link}<br><br>Previous downloads:<br>{existing_links}"
|
208 |
|
209 |
with gr.Blocks() as demo:
|
210 |
gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
|