PaperPulse

Sleeping

App Files Files Community

awacke1 commited on Sep 25, 2024

Commit

f2c0706

verified ·

1 Parent(s): 23e3bf5

Create app.py

Browse files

Files changed (1) hide show

app.py +97 -0

app.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import gradio as gr
+import time
+import os
+import json
+import PyPDF2
+import io
+import markdown
+def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
+    # ... (existing code remains the same)
+def load_cached_data(cache_file):
+    # ... (existing code remains the same)
+def save_cached_data(data, cache_file):
+    # ... (existing code remains the same)
+def format_dataframe(data):
+    # ... (existing code remains the same)
+def load_and_cache_data(url, cache_file):
+    # ... (existing code remains the same)
+def update_display(category):
+    # ... (existing code remains the same)
+def load_all_data():
+    # ... (existing code remains the same)
+def download_and_convert_pdfs(data):
+    consolidated_text = ""
+    for title, paper_info in data.items():
+        pdf_url = paper_info['pdf_link']
+        if pdf_url:
+            try:
+                response = requests.get(pdf_url)
+                pdf_file = io.BytesIO(response.content)
+                pdf_reader = PyPDF2.PdfReader(pdf_file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text()
+                markdown_text = f"# {title}\n\n{text}\n\n---\n\n"
+                consolidated_text += markdown_text
+            except Exception as e:
+                print(f"Error processing PDF for {title}: {str(e)}")
+    return consolidated_text
+def download_all_papers():
+    all_data = {}
+    for category in ["top", "latest", "greatest"]:
+        cache_file = f"{category}_papers_cache.json"
+        data = load_cached_data(cache_file)
+        if data:
+            all_data.update(data)
+    consolidated_text = download_and_convert_pdfs(all_data)
+    with open("consolidated_papers.md", "w", encoding="utf-8") as f:
+        f.write(consolidated_text)
+    return "All papers have been downloaded and consolidated into 'consolidated_papers.md'"
+with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
+    with gr.Tab("Top Trending Papers"):
+        top_count = gr.Textbox(label="Number of Papers Fetched")
+        top_html = gr.HTML()
+        top_button = gr.Button("Refresh Leaderboard")
+        top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_html])
+    with gr.Tab("New Papers"):
+        new_count = gr.Textbox(label="Number of Papers Fetched")
+        new_html = gr.HTML()
+        new_button = gr.Button("Refresh Leaderboard")
+        new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_html])
+    with gr.Tab("Greatest Papers"):
+        greatest_count = gr.Textbox(label="Number of Papers Fetched")
+        greatest_html = gr.HTML()
+        greatest_button = gr.Button("Refresh Leaderboard")
+        greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_html])
+    download_button = gr.Button("📚 Download All Papers", variant="primary")
+    download_output = gr.Textbox(label="Download Status")
+    download_button.click(fn=download_all_papers, inputs=None, outputs=download_output)
+    # Load initial data for all tabs
+    demo.load(fn=load_all_data, outputs=[top_count, top_html, new_count, new_html, greatest_count, greatest_html])
+# Launch the Gradio interface with a public link
+demo.launch(share=True)