awacke1 commited on
Commit
f2c0706
·
verified ·
1 Parent(s): 23e3bf5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import gradio as gr
5
+ import time
6
+ import os
7
+ import json
8
+ import PyPDF2
9
+ import io
10
+ import markdown
11
+
12
+ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
13
+ # ... (existing code remains the same)
14
+
15
+ def load_cached_data(cache_file):
16
+ # ... (existing code remains the same)
17
+
18
+ def save_cached_data(data, cache_file):
19
+ # ... (existing code remains the same)
20
+
21
+ def format_dataframe(data):
22
+ # ... (existing code remains the same)
23
+
24
+ def load_and_cache_data(url, cache_file):
25
+ # ... (existing code remains the same)
26
+
27
+ def update_display(category):
28
+ # ... (existing code remains the same)
29
+
30
+ def load_all_data():
31
+ # ... (existing code remains the same)
32
+
33
+ def download_and_convert_pdfs(data):
34
+ consolidated_text = ""
35
+ for title, paper_info in data.items():
36
+ pdf_url = paper_info['pdf_link']
37
+ if pdf_url:
38
+ try:
39
+ response = requests.get(pdf_url)
40
+ pdf_file = io.BytesIO(response.content)
41
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
42
+ text = ""
43
+ for page in pdf_reader.pages:
44
+ text += page.extract_text()
45
+
46
+ markdown_text = f"# {title}\n\n{text}\n\n---\n\n"
47
+ consolidated_text += markdown_text
48
+ except Exception as e:
49
+ print(f"Error processing PDF for {title}: {str(e)}")
50
+
51
+ return consolidated_text
52
+
53
+ def download_all_papers():
54
+ all_data = {}
55
+ for category in ["top", "latest", "greatest"]:
56
+ cache_file = f"{category}_papers_cache.json"
57
+ data = load_cached_data(cache_file)
58
+ if data:
59
+ all_data.update(data)
60
+
61
+ consolidated_text = download_and_convert_pdfs(all_data)
62
+
63
+ with open("consolidated_papers.md", "w", encoding="utf-8") as f:
64
+ f.write(consolidated_text)
65
+
66
+ return "All papers have been downloaded and consolidated into 'consolidated_papers.md'"
67
+
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
70
+
71
+ with gr.Tab("Top Trending Papers"):
72
+ top_count = gr.Textbox(label="Number of Papers Fetched")
73
+ top_html = gr.HTML()
74
+ top_button = gr.Button("Refresh Leaderboard")
75
+ top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_html])
76
+
77
+ with gr.Tab("New Papers"):
78
+ new_count = gr.Textbox(label="Number of Papers Fetched")
79
+ new_html = gr.HTML()
80
+ new_button = gr.Button("Refresh Leaderboard")
81
+ new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_html])
82
+
83
+ with gr.Tab("Greatest Papers"):
84
+ greatest_count = gr.Textbox(label="Number of Papers Fetched")
85
+ greatest_html = gr.HTML()
86
+ greatest_button = gr.Button("Refresh Leaderboard")
87
+ greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_html])
88
+
89
+ download_button = gr.Button("📚 Download All Papers", variant="primary")
90
+ download_output = gr.Textbox(label="Download Status")
91
+ download_button.click(fn=download_all_papers, inputs=None, outputs=download_output)
92
+
93
+ # Load initial data for all tabs
94
+ demo.load(fn=load_all_data, outputs=[top_count, top_html, new_count, new_html, greatest_count, greatest_html])
95
+
96
+ # Launch the Gradio interface with a public link
97
+ demo.launch(share=True)