Spaces:

librarian-bots
/

claim-papers

Sleeping

App Files Files Community

claim-papers / app.py

davanstrien HF Staff

Add check for logged in user before accessing

1f24a5f over 1 year ago

raw

history blame

5.22 kB

	import httpx
	from cytoolz import groupby
	from functools import lru_cache
	from rich import print
	from functools import partial
	import gradio as gr
	from typing import Optional


	def query_author(author_name: str):
	url = f"https://api.semanticscholar.org/graph/v1/author/search?query={author_name}&fields=name,url,externalIds,papers.externalIds,papers.title,papers.year"
	resp = httpx.get(url)
	resp.raise_for_status()
	return resp.json()["data"]


	def get_arxiv_paper(papers):
	papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")]
	return [
	paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv")
	]


	def check_arxiv_in_papers(arxiv_ids, papers):
	papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")]
	papers_with_arxiv_ids = [
	paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv")
	]
	return any(
	paper
	for paper in papers_with_arxiv_ids
	if paper["externalIds"].get("ArXiv") in arxiv_ids
	)


	def get_author_from_options(potential_authors, positive_arxiv_ids):
	return next(
	(
	author
	for author in potential_authors
	if check_arxiv_in_papers(set(positive_arxiv_ids), author["papers"])
	),
	None,
	)


	def sort_by_date(papers):
	return sorted(papers, key=lambda paper: paper["year"], reverse=True)


	@lru_cache()
	def lookup_hf_paper(arxiv_id):
	url = f"https://huggingface.co/api/papers/{arxiv_id}"
	resp = httpx.get(url)
	return resp.json()


	def check_if_index_hf_paper(paper):
	arxiv_id = paper["externalIds"]["ArXiv"]
	data = lookup_hf_paper(arxiv_id)
	return not data.get("error")


	def groupby_indexed_by_hf_papers(papers):
	return groupby(check_if_index_hf_paper, papers)


	def check_hf_user_in_authors(paper, hf_user_name):
	authors = paper["authors"]
	authors = [author for author in authors if author.get("user")]
	return any(author["user"]["user"] == hf_user_name for author in authors)


	def groupby_hf_user_papers(papers, hf_user_name):
	check_hf_user_in_authors_partial = partial(
	check_hf_user_in_authors, hf_user_name=hf_user_name
	)
	return groupby(check_hf_user_in_authors_partial, papers)


	def get_papers(
	author_name: str, positive_arxiv_ids: str, hf_user_name: Optional[gr.OAuthProfile]
	):
	if not hf_user_name:
	raise gr.Error("You must be logged in to use this Space")
	hf_user_name = hf_user_name.preferred_username
	positive_arxiv_ids = positive_arxiv_ids.split(",")
	potential_authors = query_author(author_name)
	author = get_author_from_options(potential_authors, positive_arxiv_ids)
	papers = get_arxiv_paper(author["papers"])
	papers = sort_by_date(papers)
	papers_indexed_by_hf = groupby_indexed_by_hf_papers(papers)
	# print(papers_indexed_by_hf[True])

	indexed_papers = [
	lookup_hf_paper(paper["externalIds"]["ArXiv"])
	for paper in papers_indexed_by_hf[True]
	]

	already_claimed = groupby_hf_user_papers(indexed_papers, hf_user_name)
	if already_claimed.get(False):
	results = (
	"# Papers already indexed by Hugging Face which you haven't claimed\n"
	+ "These papers are already indexed by Hugging Face, but you haven't claimed them yet. You can claim them by clicking on the link and then clicking on the 'Claim' button on the Hugging Face papers page.\n"
	)
	for paper in already_claimed[False]:
	url = f"https://huggingface.co/papers/{paper['id']}"
	results += f"- [{paper['title']}]({url})\n"
	else:
	results = "You have claimed all papers indexed by Hugging Face!\n"
	if papers_indexed_by_hf.get(False):
	results += "# Papers not yet indexed by Hugging Face which you can claim\n"
	for paper in papers_indexed_by_hf[False]:
	paper_title = paper["title"]
	arxiv_id = paper["externalIds"]["ArXiv"]
	url = f"https://huggingface.co/papers/{arxiv_id}"
	results += f"- [{paper_title}]({url})\n"
	return results


	with gr.Blocks() as demo:
	gr.HTML(
	"<h1 style='text-align:center;'> 📃 Hugging Face Paper Claimer 📃 </h1>"
	)
	gr.HTML(
	"<div style='text-align:center;'>You can use this Space to help you find arXiv papers you can still claim.</div>"
	)
	gr.Markdown(
	"NOTE This Space uses the [Semantic Scholar API](https://www.semanticscholar.org/product/api) to find papers you have authored. Occasionaly this API returns false positives i.e. papers which you did not author"
	)
	with gr.Row():
	gr.LoginButton(size="sm")
	gr.LogoutButton(size="sm")
	author_name = gr.Textbox(
	placeholder="Daniel van Strien",
	label="The name you publish under",
	interactive=True,
	)
	positive_arxiv_ids = gr.Textbox(
	placeholder="1910.01108",
	label="ArXiv ID for a paper for which you are an author",
	interactive=True,
	)
	btn = gr.Button("Get papers")
	btn.click(get_papers, [author_name, positive_arxiv_ids], gr.Markdown())

	demo.launch(debug=True)