File size: 5,548 Bytes
3e26d53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d04ce0
3e26d53
4d04ce0
3e26d53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d04ce0
3e26d53
 
 
def031c
3e26d53
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
import time

def get_rank_papers(url,progress=gr.Progress(track_tqdm=True)):
    base_url = "https://paperswithcode.com"

    session = requests.Session()
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Cache-Control': 'no-cache'
    }
    print("Time run at : ",time.ctime())
    offset = 0
    data_list = {}
    break_duplicate = 10
    

    while True:
        response = session.get(url, headers=headers, params={'page': offset})
        if response.status_code != 200:
            print('Failed to retrieve data')
            break
        soup = BeautifulSoup(response.text, 'html.parser')
        paper_info = soup.find_all('div', class_='row infinite-item item paper-card')
        if not paper_info:
            break
        for ppr in paper_info:
            title = ppr.find('h1').text.strip()
            
            if "paper" in ppr.find('a')['href']:
                link = base_url + ppr.find('a')['href']
            else:
                link = ppr.find('a')['href'] 
            Github_Star = ppr.find('span', class_='badge badge-secondary').text.strip().replace(',', '')
            pdf_link = ''
            try:
                response_link  = session.get(link, headers=headers)
                soup_link = BeautifulSoup(response_link.text, 'html.parser')
                paper_info_link = soup_link.find_all('div', class_='paper-abstract')
                pdf_link = paper_info_link[0].find('div', class_='col-md-12').find('a')['href']
            except:
                pass
            if title not in data_list:
                data_list[title] = {'link': link, 'Github Star': int(Github_Star), 'pdf_link': pdf_link.strip()}
            else:
                break_duplicate -= 1
                if break_duplicate == 0:
                    return data_list
        offset += 1 
        progress.update(offset)
    print('Data retrieval complete')   
    return data_list

def reload_gradio_top():
    # Retrieve rank papers
    print('Retrieving data .. top papers')
    rank_paper_df = get_rank_papers(url="https://paperswithcode.com/")
    rank_paper_df = dict(sorted(rank_paper_df.items(), key=lambda x: x[1]['Github Star'], reverse=True))
    rank_paper_df = pd.DataFrame(rank_paper_df).T
    # Add title column
    rank_paper_df['title'] = rank_paper_df.index
    rank_paper_df = rank_paper_df[['title', 'Github Star', 'link', 'pdf_link']]
    # Convert link column to HTML links
    rank_paper_df['link'] = rank_paper_df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')  
    rank_paper_df['pdf_link'] = rank_paper_df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
    rank_paper = rank_paper_df.to_html(escape=False, index=False)
    return len(rank_paper_df),rank_paper


def reload_gradio_new():
    # Retrieve rank papers
    print('Retrieving data .. new papers')
    rank_paper_df = get_rank_papers(url="https://paperswithcode.com/latest")
    rank_paper_df = dict(sorted(rank_paper_df.items(), key=lambda x: x[1]['Github Star'], reverse=True))
    rank_paper_df = pd.DataFrame(rank_paper_df).T
    # Add title column
    rank_paper_df['title'] = rank_paper_df.index
    rank_paper_df = rank_paper_df[['title', 'Github Star', 'link', 'pdf_link']]
    # Convert link column to HTML links
    rank_paper_df['link'] = rank_paper_df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')  
    rank_paper_df['pdf_link'] = rank_paper_df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
    rank_paper = rank_paper_df.to_html(escape=False, index=False)
    return len(rank_paper_df),rank_paper


def reload_gradio_greatest():
    # Retrieve rank papers
    print('Retrieving data .. greatest papers')
    rank_paper_df = get_rank_papers(url="https://paperswithcode.com/greatest")
    rank_paper_df = dict(sorted(rank_paper_df.items(), key=lambda x: x[1]['Github Star'], reverse=True))
    rank_paper_df = pd.DataFrame(rank_paper_df).T
    # Add title column
    rank_paper_df['title'] = rank_paper_df.index
    rank_paper_df = rank_paper_df[['title', 'Github Star', 'link', 'pdf_link']]
    # Convert link column to HTML links
    rank_paper_df['link'] = rank_paper_df['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')  
    rank_paper_df['pdf_link'] = rank_paper_df['pdf_link'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
    rank_paper = rank_paper_df.to_html(escape=False, index=False)
    return len(rank_paper_df),rank_paper


with gr.Blocks() as demo:
    gr.Markdown("<h1><center>Papers Leaderboard</center>")
    with gr.Tab("Top Trending Papers"):
        button = gr.Button("Load Leaderboard")
        output = [gr.Textbox(label="Number of Papers Fetch"),
                  gr.HTML()]  
        button.click(fn=reload_gradio_top, inputs=None, outputs=output)
    with gr.Tab("New Papers"):
        button = gr.Button("Load Leaderboard")
        output = [gr.Textbox(label="Number of Papers Fetch"),
                  gr.HTML()]  
        button.click(fn=reload_gradio_new, inputs=None, outputs=output)
    with gr.Tab("Greatest Papers"):
        button = gr.Button("Load Leaderboard")
        output = [gr.Textbox(label="Number of Papers Fetch"),
                  gr.HTML()]  
        button.click(fn=reload_gradio_greatest, inputs=None, outputs=output)

# Launch the Gradio interface
demo.launch()