Papers Leaderboard

import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
import time
import os
import json
import PyPDF2
import io
import markdown

def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
    # ... (existing code remains the same)

def load_cached_data(cache_file):
    # ... (existing code remains the same)

def save_cached_data(data, cache_file):
    # ... (existing code remains the same)

def format_dataframe(data):
    # ... (existing code remains the same)

def load_and_cache_data(url, cache_file):
    # ... (existing code remains the same)

def update_display(category):
    # ... (existing code remains the same)

def load_all_data():
    # ... (existing code remains the same)

def download_and_convert_pdfs(data):
    consolidated_text = ""
    for title, paper_info in data.items():
        pdf_url = paper_info['pdf_link']
        if pdf_url:
            try:
                response = requests.get(pdf_url)
                pdf_file = io.BytesIO(response.content)
                pdf_reader = PyPDF2.PdfReader(pdf_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                
                markdown_text = f"# {title}\n\n{text}\n\n---\n\n"
                consolidated_text += markdown_text
            except Exception as e:
                print(f"Error processing PDF for {title}: {str(e)}")
    
    return consolidated_text

def download_all_papers():
    all_data = {}
    for category in ["top", "latest", "greatest"]:
        cache_file = f"{category}_papers_cache.json"
        data = load_cached_data(cache_file)
        if data:
            all_data.update(data)
    
    consolidated_text = download_and_convert_pdfs(all_data)
    
    with open("consolidated_papers.md", "w", encoding="utf-8") as f:
        f.write(consolidated_text)
    
    return "All papers have been downloaded and consolidated into 'consolidated_papers.md'"

with gr.Blocks() as demo:
    gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
    
    with gr.Tab("Top Trending Papers"):
        top_count = gr.Textbox(label="Number of Papers Fetched")
        top_html = gr.HTML()
        top_button = gr.Button("Refresh Leaderboard")
        top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_html])
    
    with gr.Tab("New Papers"):
        new_count = gr.Textbox(label="Number of Papers Fetched")
        new_html = gr.HTML()
        new_button = gr.Button("Refresh Leaderboard")
        new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_html])
    
    with gr.Tab("Greatest Papers"):
        greatest_count = gr.Textbox(label="Number of Papers Fetched")
        greatest_html = gr.HTML()
        greatest_button = gr.Button("Refresh Leaderboard")
        greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_html])

    download_button = gr.Button("📚 Download All Papers", variant="primary")
    download_output = gr.Textbox(label="Download Status")
    download_button.click(fn=download_all_papers, inputs=None, outputs=download_output)

    # Load initial data for all tabs
    demo.load(fn=load_all_data, outputs=[top_count, top_html, new_count, new_html, greatest_count, greatest_html])

# Launch the Gradio interface with a public link
demo.launch(share=True)