awacke1 commited on
Commit
a384a25
·
verified ·
1 Parent(s): a2f0fdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -60
app.py CHANGED
@@ -17,7 +17,7 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
17
  offset = 0
18
  data_list = {}
19
  break_duplicate = 10
20
-
21
  while True:
22
  response = session.get(url, headers=headers, params={'page': offset})
23
  if response.status_code != 200:
@@ -29,11 +29,11 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
29
  break
30
  for ppr in paper_info:
31
  title = ppr.find('h1').text.strip()
32
-
33
  if "paper" in ppr.find('a')['href']:
34
  link = base_url + ppr.find('a')['href']
35
  else:
36
- link = ppr.find('a')['href']
37
  Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '')
38
  pdf_link = ''
39
  try:
@@ -49,9 +49,9 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
49
  break_duplicate -= 1
50
  if break_duplicate == 0:
51
  return data_list
52
- offset += 1
53
  progress.update(offset)
54
- print('Data retrieval complete')
55
  return data_list
56
 
57
  def load_cached_data(cache_file):
@@ -68,15 +68,17 @@ def format_dataframe(data):
68
  df = pd.DataFrame(data).T
69
  df['title'] = df.index
70
  df = df[['title', 'Github Star', 'link', 'pdf_link']]
 
 
71
  return df
72
 
73
  def load_and_cache_data(url, cache_file):
74
  cached_data = load_cached_data(cache_file)
75
-
76
  if cached_data:
77
  print(f"Loading cached data from {cache_file}")
78
  return cached_data
79
-
80
  print(f"Fetching new data from {url}")
81
  new_data = get_rank_papers(url)
82
  save_cached_data(new_data, cache_file)
@@ -85,76 +87,41 @@ def load_and_cache_data(url, cache_file):
85
  def update_display(category):
86
  cache_file = f"{category}_papers_cache.json"
87
  url = f"https://paperswithcode.com/{category}" if category != "top" else "https://paperswithcode.com/"
88
-
89
  data = load_and_cache_data(url, cache_file)
90
  df = format_dataframe(data)
91
-
92
- return len(df), df
93
 
94
  def load_all_data():
95
- top_count, top_df = update_display("top")
96
- new_count, new_df = update_display("latest")
97
- greatest_count, greatest_df = update_display("greatest")
98
- return top_count, top_df, new_count, new_df, greatest_count, greatest_df
99
-
100
- def save_dataframe_generic(df, filename):
101
- try:
102
- df.to_csv(filename, index=False)
103
- return "Dataframe saved successfully."
104
- except Exception as e:
105
- return f"Error saving dataframe: {e}"
106
-
107
- def load_dataframe_generic(filename):
108
- try:
109
- if os.path.exists(filename):
110
- df = pd.read_csv(filename)
111
- return df, "Dataframe loaded successfully."
112
- else:
113
- return pd.DataFrame(), "Dataframe file not found."
114
- except Exception as e:
115
- return pd.DataFrame(), f"Error loading dataframe: {e}"
116
 
117
  with gr.Blocks() as demo:
118
  gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
119
-
120
  with gr.Tab("Top Trending Papers"):
121
  top_count = gr.Textbox(label="Number of Papers Fetched")
122
- top_df = gr.DataFrame(interactive=True)
123
  top_button = gr.Button("Refresh Leaderboard")
124
- top_load_button = gr.Button("Load Dataframe")
125
- top_save_button = gr.Button("Save Dataframe")
126
- top_save_status = gr.Textbox(label="Status")
127
-
128
- top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_df])
129
- top_save_button.click(fn=lambda df: save_dataframe_generic(df, 'top_dataframe.csv'), inputs=top_df, outputs=top_save_status)
130
- top_load_button.click(fn=lambda: load_dataframe_generic('top_dataframe.csv'), inputs=None, outputs=[top_df, top_save_status])
131
-
132
  with gr.Tab("New Papers"):
133
  new_count = gr.Textbox(label="Number of Papers Fetched")
134
- new_df = gr.DataFrame(interactive=True)
135
  new_button = gr.Button("Refresh Leaderboard")
136
- new_load_button = gr.Button("Load Dataframe")
137
- new_save_button = gr.Button("Save Dataframe")
138
- new_save_status = gr.Textbox(label="Status")
139
-
140
- new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_df])
141
- new_save_button.click(fn=lambda df: save_dataframe_generic(df, 'new_dataframe.csv'), inputs=new_df, outputs=new_save_status)
142
- new_load_button.click(fn=lambda: load_dataframe_generic('new_dataframe.csv'), inputs=None, outputs=[new_df, new_save_status])
143
-
144
  with gr.Tab("Greatest Papers"):
145
  greatest_count = gr.Textbox(label="Number of Papers Fetched")
146
- greatest_df = gr.DataFrame(interactive=True)
147
  greatest_button = gr.Button("Refresh Leaderboard")
148
- greatest_load_button = gr.Button("Load Dataframe")
149
- greatest_save_button = gr.Button("Save Dataframe")
150
- greatest_save_status = gr.Textbox(label="Status")
151
-
152
- greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_df])
153
- greatest_save_button.click(fn=lambda df: save_dataframe_generic(df, 'greatest_dataframe.csv'), inputs=greatest_df, outputs=greatest_save_status)
154
- greatest_load_button.click(fn=lambda: load_dataframe_generic('greatest_dataframe.csv'), inputs=None, outputs=[greatest_df, greatest_save_status])
155
 
156
  # Load initial data for all tabs
157
- demo.load(fn=load_all_data, outputs=[top_count, top_df, new_count, new_df, greatest_count, greatest_df])
158
 
159
  # Launch the Gradio interface with a public link
160
- demo.launch(share=True)
 
17
  offset = 0
18
  data_list = {}
19
  break_duplicate = 10
20
+
21
  while True:
22
  response = session.get(url, headers=headers, params={'page': offset})
23
  if response.status_code != 200:
 
29
  break
30
  for ppr in paper_info:
31
  title = ppr.find('h1').text.strip()
32
+
33
  if "paper" in ppr.find('a')['href']:
34
  link = base_url + ppr.find('a')['href']
35
  else:
36
+ link = ppr.find('a')['href']
37
  Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '')
38
  pdf_link = ''
39
  try:
 
49
  break_duplicate -= 1
50
  if break_duplicate == 0:
51
  return data_list
52
+ offset += 1
53
  progress.update(offset)
54
+ print('Data retrieval complete')
55
  return data_list
56
 
57
  def load_cached_data(cache_file):
 
68
  df = pd.DataFrame(data).T
69
  df['title'] = df.index
70
  df = df[['title', 'Github Star', 'link', 'pdf_link']]
71
+ df['link'] = df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
72
+ df['pdf_link'] = df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
73
  return df
74
 
75
  def load_and_cache_data(url, cache_file):
76
  cached_data = load_cached_data(cache_file)
77
+
78
  if cached_data:
79
  print(f"Loading cached data from {cache_file}")
80
  return cached_data
81
+
82
  print(f"Fetching new data from {url}")
83
  new_data = get_rank_papers(url)
84
  save_cached_data(new_data, cache_file)
 
87
  def update_display(category):
88
  cache_file = f"{category}_papers_cache.json"
89
  url = f"https://paperswithcode.com/{category}" if category != "top" else "https://paperswithcode.com/"
90
+
91
  data = load_and_cache_data(url, cache_file)
92
  df = format_dataframe(data)
93
+
94
+ return len(df), df.to_html(escape=False, index=False)
95
 
96
  def load_all_data():
97
+ top_count, top_html = update_display("top")
98
+ new_count, new_html = update_display("latest")
99
+ greatest_count, greatest_html = update_display("greatest")
100
+ return top_count, top_html, new_count, new_html, greatest_count, greatest_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  with gr.Blocks() as demo:
103
  gr.Markdown("<h1><center>Papers Leaderboard</center></h1>")
104
+
105
  with gr.Tab("Top Trending Papers"):
106
  top_count = gr.Textbox(label="Number of Papers Fetched")
107
+ top_html = gr.HTML()
108
  top_button = gr.Button("Refresh Leaderboard")
109
+ top_button.click(fn=lambda: update_display("top"), inputs=None, outputs=[top_count, top_html])
110
+
 
 
 
 
 
 
111
  with gr.Tab("New Papers"):
112
  new_count = gr.Textbox(label="Number of Papers Fetched")
113
+ new_html = gr.HTML()
114
  new_button = gr.Button("Refresh Leaderboard")
115
+ new_button.click(fn=lambda: update_display("latest"), inputs=None, outputs=[new_count, new_html])
116
+
 
 
 
 
 
 
117
  with gr.Tab("Greatest Papers"):
118
  greatest_count = gr.Textbox(label="Number of Papers Fetched")
119
+ greatest_html = gr.HTML()
120
  greatest_button = gr.Button("Refresh Leaderboard")
121
+ greatest_button.click(fn=lambda: update_display("greatest"), inputs=None, outputs=[greatest_count, greatest_html])
 
 
 
 
 
 
122
 
123
  # Load initial data for all tabs
124
+ demo.load(fn=load_all_data, outputs=[top_count, top_html, new_count, new_html, greatest_count, greatest_html])
125
 
126
  # Launch the Gradio interface with a public link
127
+ demo.launch(share=True)