awacke1 commited on
Commit
6727a04
·
verified ·
1 Parent(s): 09acd5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -128,6 +128,9 @@ async def download_and_save_pdf(session, title, paper_info, directory):
128
  async with session.get(pdf_url) as response:
129
  pdf_content = await response.read()
130
 
 
 
 
131
  safe_title = safe_filename(title)
132
  filename = f"{safe_title}.pdf"
133
  filepath = os.path.join(directory, filename)
@@ -155,14 +158,16 @@ async def process_papers(data, directory, progress=gr.Progress()):
155
  return "\n".join(results)
156
 
157
  def zip_directory(directory):
158
- """Zip the entire directory."""
159
  zip_filename = f"{directory}.zip"
160
  with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
161
  for root, _, files in os.walk(directory):
162
  for file in files:
163
- zipf.write(os.path.join(root, file),
164
- os.path.relpath(os.path.join(root, file),
165
- os.path.join(directory, '..')))
 
 
166
  return zip_filename
167
 
168
  def get_base64_download_link(file_path):
@@ -196,7 +201,10 @@ def download_all_papers(progress=gr.Progress()):
196
 
197
  existing_links = get_existing_zip_links()
198
 
199
- return f"All papers have been downloaded and saved in {zip_file}\n\n{results}", f"{download_link}<br><br>Previous downloads:<br>{existing_links}"
 
 
 
200
 
201
  with gr.Blocks() as demo:
202
  gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
 
128
  async with session.get(pdf_url) as response:
129
  pdf_content = await response.read()
130
 
131
+ if len(pdf_content) < 2048: # Check if the PDF is less than 2KB
132
+ return f"Downloaded PDF for {title} is too small (less than 2KB). Skipping."
133
+
134
  safe_title = safe_filename(title)
135
  filename = f"{safe_title}.pdf"
136
  filepath = os.path.join(directory, filename)
 
158
  return "\n".join(results)
159
 
160
  def zip_directory(directory):
161
+ """Zip the entire directory, excluding files smaller than 2KB."""
162
  zip_filename = f"{directory}.zip"
163
  with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
164
  for root, _, files in os.walk(directory):
165
  for file in files:
166
+ file_path = os.path.join(root, file)
167
+ if os.path.getsize(file_path) >= 2048: # Only include files 2KB or larger
168
+ zipf.write(file_path,
169
+ os.path.relpath(file_path,
170
+ os.path.join(directory, '..')))
171
  return zip_filename
172
 
173
  def get_base64_download_link(file_path):
 
201
 
202
  existing_links = get_existing_zip_links()
203
 
204
+ # Count successful downloads
205
+ successful_downloads = sum(1 for result in results.split('\n') if result.startswith("Successfully saved:"))
206
+
207
+ return f"Papers downloaded: {successful_downloads} out of {len(all_data)}\nAll papers have been processed and saved in {zip_file}\n\n{results}", f"{download_link}<br><br>Previous downloads:<br>{existing_links}"
208
 
209
  with gr.Blocks() as demo:
210
  gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")