awacke1 commited on
Commit
283e444
Β·
verified Β·
1 Parent(s): 4b0b693

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -68,6 +68,7 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
68
  soup = BeautifulSoup(response.text, 'html.parser')
69
  paper_info = soup.find_all('div', class_='row infinite-item item paper-card')
70
  if not paper_info:
 
71
  break
72
  for ppr in paper_info:
73
  title = ppr.find('h1').text.strip()
@@ -76,15 +77,20 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
76
  link = base_url + ppr.find('a')['href']
77
  else:
78
  link = ppr.find('a')['href']
79
- Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '')
 
 
80
  pdf_link = ''
81
  try:
82
  response_link = session.get(link, headers=headers)
83
  soup_link = BeautifulSoup(response_link.text, 'html.parser')
84
  paper_info_link = soup_link.find_all('div', class_='paper-abstract')
85
  pdf_link = paper_info_link[0].find('div', class_='col-md-12').find('a')['href']
86
- except:
87
- pass
 
 
 
88
  if title not in data_list:
89
  data_list[title] = {'link': link, 'Github Star': int(Github_Star), 'pdf_link': pdf_link.strip()}
90
  else:
@@ -125,12 +131,23 @@ def load_and_cache_data(url, cache_file):
125
 
126
  def format_dataframe(data):
127
  """Format data into a pretty DataFrame. It's like giving your data a makeover! πŸ’…πŸ“ˆ"""
 
 
 
 
128
  df = pd.DataFrame(data).T
129
  df['title'] = df.index
130
- df = df[['title', 'Github Star', 'link', 'pdf_link']]
131
- df = df.sort_values(by='Github Star', ascending=False)
132
- df['link'] = df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
133
- df['pdf_link'] = df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
 
 
 
 
 
 
 
134
  return df
135
 
136
  def update_display(category):
 
68
  soup = BeautifulSoup(response.text, 'html.parser')
69
  paper_info = soup.find_all('div', class_='row infinite-item item paper-card')
70
  if not paper_info:
71
+ print("No paper information found.")
72
  break
73
  for ppr in paper_info:
74
  title = ppr.find('h1').text.strip()
 
77
  link = base_url + ppr.find('a')['href']
78
  else:
79
  link = ppr.find('a')['href']
80
+
81
+ Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '') if ppr.find('span', class_='badge badge-secondary') else "0"
82
+
83
  pdf_link = ''
84
  try:
85
  response_link = session.get(link, headers=headers)
86
  soup_link = BeautifulSoup(response_link.text, 'html.parser')
87
  paper_info_link = soup_link.find_all('div', class_='paper-abstract')
88
  pdf_link = paper_info_link[0].find('div', class_='col-md-12').find('a')['href']
89
+ except Exception as e:
90
+ print(f"Failed to retrieve PDF link for {title}: {e}")
91
+
92
+ print(f"Title: {title}, Link: {link}, Github Star: {Github_Star}, PDF Link: {pdf_link}")
93
+
94
  if title not in data_list:
95
  data_list[title] = {'link': link, 'Github Star': int(Github_Star), 'pdf_link': pdf_link.strip()}
96
  else:
 
131
 
132
  def format_dataframe(data):
133
  """Format data into a pretty DataFrame. It's like giving your data a makeover! πŸ’…πŸ“ˆ"""
134
+ if not data:
135
+ print("No data found to format.")
136
+ return pd.DataFrame()
137
+
138
  df = pd.DataFrame(data).T
139
  df['title'] = df.index
140
+
141
+ # Check if required columns are present
142
+ if 'Github Star' in df.columns and 'link' in df.columns and 'pdf_link' in df.columns:
143
+ df = df[['title', 'Github Star', 'link', 'pdf_link']]
144
+ df = df.sort_values(by='Github Star', ascending=False)
145
+ df['link'] = df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
146
+ df['pdf_link'] = df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
147
+ else:
148
+ print("Required columns are missing in the dataframe.")
149
+ print(f"Columns available: {df.columns}")
150
+
151
  return df
152
 
153
  def update_display(category):