Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -68,6 +68,7 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
|
|
68 |
soup = BeautifulSoup(response.text, 'html.parser')
|
69 |
paper_info = soup.find_all('div', class_='row infinite-item item paper-card')
|
70 |
if not paper_info:
|
|
|
71 |
break
|
72 |
for ppr in paper_info:
|
73 |
title = ppr.find('h1').text.strip()
|
@@ -76,15 +77,20 @@ def get_rank_papers(url, progress=gr.Progress(track_tqdm=True)):
|
|
76 |
link = base_url + ppr.find('a')['href']
|
77 |
else:
|
78 |
link = ppr.find('a')['href']
|
79 |
-
|
|
|
|
|
80 |
pdf_link = ''
|
81 |
try:
|
82 |
response_link = session.get(link, headers=headers)
|
83 |
soup_link = BeautifulSoup(response_link.text, 'html.parser')
|
84 |
paper_info_link = soup_link.find_all('div', class_='paper-abstract')
|
85 |
pdf_link = paper_info_link[0].find('div', class_='col-md-12').find('a')['href']
|
86 |
-
except:
|
87 |
-
|
|
|
|
|
|
|
88 |
if title not in data_list:
|
89 |
data_list[title] = {'link': link, 'Github Star': int(Github_Star), 'pdf_link': pdf_link.strip()}
|
90 |
else:
|
@@ -125,12 +131,23 @@ def load_and_cache_data(url, cache_file):
|
|
125 |
|
126 |
def format_dataframe(data):
|
127 |
"""Format data into a pretty DataFrame. It's like giving your data a makeover! π
π"""
|
|
|
|
|
|
|
|
|
128 |
df = pd.DataFrame(data).T
|
129 |
df['title'] = df.index
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
return df
|
135 |
|
136 |
def update_display(category):
|
|
|
68 |
soup = BeautifulSoup(response.text, 'html.parser')
|
69 |
paper_info = soup.find_all('div', class_='row infinite-item item paper-card')
|
70 |
if not paper_info:
|
71 |
+
print("No paper information found.")
|
72 |
break
|
73 |
for ppr in paper_info:
|
74 |
title = ppr.find('h1').text.strip()
|
|
|
77 |
link = base_url + ppr.find('a')['href']
|
78 |
else:
|
79 |
link = ppr.find('a')['href']
|
80 |
+
|
81 |
+
Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '') if ppr.find('span', class_='badge badge-secondary') else "0"
|
82 |
+
|
83 |
pdf_link = ''
|
84 |
try:
|
85 |
response_link = session.get(link, headers=headers)
|
86 |
soup_link = BeautifulSoup(response_link.text, 'html.parser')
|
87 |
paper_info_link = soup_link.find_all('div', class_='paper-abstract')
|
88 |
pdf_link = paper_info_link[0].find('div', class_='col-md-12').find('a')['href']
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Failed to retrieve PDF link for {title}: {e}")
|
91 |
+
|
92 |
+
print(f"Title: {title}, Link: {link}, Github Star: {Github_Star}, PDF Link: {pdf_link}")
|
93 |
+
|
94 |
if title not in data_list:
|
95 |
data_list[title] = {'link': link, 'Github Star': int(Github_Star), 'pdf_link': pdf_link.strip()}
|
96 |
else:
|
|
|
131 |
|
132 |
def format_dataframe(data):
|
133 |
"""Format data into a pretty DataFrame. It's like giving your data a makeover! π
π"""
|
134 |
+
if not data:
|
135 |
+
print("No data found to format.")
|
136 |
+
return pd.DataFrame()
|
137 |
+
|
138 |
df = pd.DataFrame(data).T
|
139 |
df['title'] = df.index
|
140 |
+
|
141 |
+
# Check if required columns are present
|
142 |
+
if 'Github Star' in df.columns and 'link' in df.columns and 'pdf_link' in df.columns:
|
143 |
+
df = df[['title', 'Github Star', 'link', 'pdf_link']]
|
144 |
+
df = df.sort_values(by='Github Star', ascending=False)
|
145 |
+
df['link'] = df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
|
146 |
+
df['pdf_link'] = df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
|
147 |
+
else:
|
148 |
+
print("Required columns are missing in the dataframe.")
|
149 |
+
print(f"Columns available: {df.columns}")
|
150 |
+
|
151 |
return df
|
152 |
|
153 |
def update_display(category):
|