Spaces:
Sleeping
Sleeping
File size: 3,439 Bytes
f2c0706 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
import time
import os
import json
import PyPDF2
import io
import markdown
def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
# ... (existing code remains the same)
def load_cached_data(cache_file):
# ... (existing code remains the same)
def save_cached_data(data, cache_file):
# ... (existing code remains the same)
def format_dataframe(data):
# ... (existing code remains the same)
def load_and_cache_data(url, cache_file):
# ... (existing code remains the same)
def update_display(category):
# ... (existing code remains the same)
def load_all_data():
# ... (existing code remains the same)
def download_and_convert_pdfs(data):
consolidated_text = ""
for title, paper_info in data.items():
pdf_url = paper_info['pdf_link']
if pdf_url:
try:
response = requests.get(pdf_url)
pdf_file = io.BytesIO(response.content)
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
markdown_text = f"# {title}\n\n{text}\n\n---\n\n"
consolidated_text += markdown_text
except Exception as e:
print(f"Error processing PDF for {title}: {str(e)}")
return consolidated_text
def download_all_papers():
all_data = {}
for category in ["top", "latest", "greatest"]:
cache_file = f"{category}_papers_cache.json"
data = load_cached_data(cache_file)
if data:
all_data.update(data)
consolidated_text = download_and_convert_pdfs(all_data)
with open("consolidated_papers.md", "w", encoding="utf-8") as f:
f.write(consolidated_text)
return "All papers have been downloaded and consolidated into 'consolidated_papers.md'"
with gr.Blocks() as demo:
gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
with gr.Tab("Top Trending Papers"):
top_count = gr.Textbox(label="Number of Papers Fetched")
top_html = gr.HTML()
top_button = gr.Button("Refresh Leaderboard")
top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_html])
with gr.Tab("New Papers"):
new_count = gr.Textbox(label="Number of Papers Fetched")
new_html = gr.HTML()
new_button = gr.Button("Refresh Leaderboard")
new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_html])
with gr.Tab("Greatest Papers"):
greatest_count = gr.Textbox(label="Number of Papers Fetched")
greatest_html = gr.HTML()
greatest_button = gr.Button("Refresh Leaderboard")
greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_html])
download_button = gr.Button("๐ Download All Papers", variant="primary")
download_output = gr.Textbox(label="Download Status")
download_button.click(fn=download_all_papers, inputs=None, outputs=download_output)
# Load initial data for all tabs
demo.load(fn=load_all_data, outputs=[top_count, top_html, new_count, new_html, greatest_count, greatest_html])
# Launch the Gradio interface with a public link
demo.launch(share=True) |