flytoe commited on
Commit
70b4875
Β·
verified Β·
1 Parent(s): 7a0e4f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -66
app.py CHANGED
@@ -2,98 +2,151 @@ import os
2
  import streamlit as st
3
  import arxiv
4
  import random
5
- import networkx as nx
6
- import matplotlib.pyplot as plt
7
  import datetime
8
- from groq import Groq
 
9
 
10
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def groq_summarize(text: str) -> str:
13
- response = client.chat.completions.create(
14
- messages=[
15
- {"role": "user", "content": f"Summarize in 250 characters:\n{text}"}
16
- ],
17
- model="llama-3.3-70b-versatile",
18
- )
19
- return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def groq_generate(text: str) -> str:
22
- response = client.chat.completions.create(
23
- messages=[{"role": "user", "content": text}],
24
- model="llama-3.3-70b-versatile",
25
- )
26
- return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def retrieve_papers(query, max_results=5):
 
29
  search = arxiv.Search(query=query, max_results=max_results)
30
  papers = []
31
  for result in search.results():
32
- paper_id = result.entry_id.split("/")[-1]
 
 
 
 
33
  paper = {
34
  "title": result.title,
35
  "summary": result.summary,
36
  "url": result.pdf_url,
 
 
 
 
 
37
  "authors": [author.name for author in result.authors],
38
  "published": result.published,
39
- "doi": f"https://doi.org/10.48550/arXiv.{paper_id}",
40
- "bibliographic_explorer": f"https://arxiv.org/abs/{paper_id}",
41
- "litmaps": f"https://app.litmaps.com/preview/{paper_id}",
42
- "trust_score": random.randint(60, 100),
43
- "relevance_score": random.randint(50, 100)
44
  }
45
  papers.append(paper)
46
  return papers
47
 
48
- def summarize_text(text):
49
- return groq_summarize(text)
50
-
51
- def get_cached_summary(paper_id, text):
52
- if 'summaries' not in st.session_state:
53
- st.session_state.summaries = {}
54
- if paper_id not in st.session_state.summaries:
55
- st.session_state.summaries[paper_id] = summarize_text(text)
56
- return st.session_state.summaries[paper_id]
57
-
58
  def random_paper_search():
59
- topics = ["machine learning", "quantum computing", "climate change", "robotics", "health AI"]
60
- return random.choice(topics)
 
 
61
 
 
 
 
62
  st.title("πŸ“š PaperPilot – Intelligent Academic Navigator")
63
 
64
- st.sidebar.header("πŸ” Search Parameters")
65
- query = st.sidebar.text_input("Research topic or question:")
66
- if st.sidebar.button("🎲 Random Search"):
67
- query = random_paper_search()
68
- st.sidebar.text(f"Random Topic: {query}")
69
- if st.sidebar.button("πŸš€ Find Articles"):
70
- if query.strip():
 
 
 
 
71
  with st.spinner("Searching arXiv..."):
72
- papers = retrieve_papers(query, random.randint(5, 15))
73
  if papers:
74
  st.session_state.papers = papers
75
- st.success(f"Found {len(papers)} papers!")
76
- st.session_state.active_section = "articles"
77
  else:
78
  st.error("No papers found. Try different keywords.")
79
- else:
80
- st.warning("Please enter a search query")
81
-
82
- if 'active_section' not in st.session_state:
83
- st.session_state.active_section = "none"
84
 
85
- if 'papers' in st.session_state and st.session_state.papers:
86
- papers = st.session_state.papers
87
- if st.session_state.active_section == "articles":
88
- st.header("πŸ“‘ Retrieved Papers")
89
- for idx, paper in enumerate(papers, 1):
90
- with st.expander(f"{idx}. {paper['title']}"):
91
- st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
92
- pub_date = paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else "n.d."
93
- st.markdown(f"**Published:** {pub_date}")
94
- st.markdown(f"**[PDF Link]({paper['url']}) | [DOI]({paper['doi']}) | [Bibliographic Explorer]({paper['bibliographic_explorer']}) | [Litmaps]({paper['litmaps']})**")
95
- st.markdown(f"**Trust Score:** {paper['trust_score']} | **Relevance Score:** {paper['relevance_score']}")
96
- summary = get_cached_summary(f"paper_{idx}", paper['summary'])
97
- st.write(summary)
98
- if st.button(f"πŸ” Explain like I'm 5 (ELI5) {idx}"):
99
- st.write(groq_generate(f"Explain this in simple terms: {summary}"))
 
2
  import streamlit as st
3
  import arxiv
4
  import random
 
 
5
  import datetime
6
+ import requests
7
+ from scholarly import scholarly
8
 
9
+ # -------------------------------
10
+ # Helper Functions
11
+ # -------------------------------
12
+ def get_paper_metadata(arxiv_id):
13
+ """Fetch metadata like citations and connected papers for scoring."""
14
+ metadata = {
15
+ "citations": 0,
16
+ "institution": "Unknown",
17
+ "authors": [],
18
+ "connected_papers": 0
19
+ }
20
+
21
+ # Fetch citation count from scite.ai
22
+ scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}"
23
+ response = requests.get(scite_url)
24
+ if response.status_code == 200:
25
+ data = response.json()
26
+ metadata["citations"] = data.get("citation_count", 0)
27
+
28
+ # Fetch connected paper count from Connected Papers
29
+ connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}"
30
+ response = requests.get(connected_papers_url)
31
+ if response.status_code == 200:
32
+ data = response.json()
33
+ metadata["connected_papers"] = len(data.get("nodes", []))
34
+
35
+ return metadata
36
 
37
+ def calculate_trust_score(metadata):
38
+ """Compute trust score based on citations and author credibility."""
39
+ trust_score = 50 # Base score
40
+
41
+ # Citations factor (max boost 30 points)
42
+ if metadata["citations"] > 100:
43
+ trust_score += 30
44
+ elif metadata["citations"] > 50:
45
+ trust_score += 20
46
+ elif metadata["citations"] > 10:
47
+ trust_score += 10
48
+
49
+ # Connected Papers factor (max boost 20 points)
50
+ if metadata["connected_papers"] > 20:
51
+ trust_score += 20
52
+ elif metadata["connected_papers"] > 10:
53
+ trust_score += 10
54
+
55
+ return min(trust_score, 100)
56
 
57
+ def calculate_relevance_score(paper, query):
58
+ """Compute relevance score based on keyword match and recency."""
59
+ relevance_score = 50 # Base score
60
+
61
+ # Keyword match factor
62
+ query_terms = query.lower().split()
63
+ title_terms = paper['title'].lower().split()
64
+ match_count = len(set(query_terms) & set(title_terms))
65
+ relevance_score += match_count * 5
66
+
67
+ # Publication date factor
68
+ if isinstance(paper['published'], datetime.datetime):
69
+ years_old = datetime.datetime.now().year - paper['published'].year
70
+ if years_old < 1:
71
+ relevance_score += 15
72
+ elif years_old < 3:
73
+ relevance_score += 10
74
+ elif years_old < 5:
75
+ relevance_score += 5
76
+
77
+ return min(relevance_score, 100)
78
 
79
  def retrieve_papers(query, max_results=5):
80
+ """Retrieve academic papers from arXiv."""
81
  search = arxiv.Search(query=query, max_results=max_results)
82
  papers = []
83
  for result in search.results():
84
+ arxiv_id = result.entry_id.split("/")[-1]
85
+ metadata = get_paper_metadata(arxiv_id)
86
+ trust_score = calculate_trust_score(metadata)
87
+ relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query)
88
+
89
  paper = {
90
  "title": result.title,
91
  "summary": result.summary,
92
  "url": result.pdf_url,
93
+ "doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}",
94
+ "bib_explorer": f"https://arxiv.org/abs/{arxiv_id}",
95
+ "connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}",
96
+ "litmaps": f"https://app.litmaps.com/preview/{arxiv_id}",
97
+ "scite": f"https://scite.ai/reports/arxiv:{arxiv_id}",
98
  "authors": [author.name for author in result.authors],
99
  "published": result.published,
100
+ "trust_score": trust_score,
101
+ "relevance_score": relevance_score
 
 
 
102
  }
103
  papers.append(paper)
104
  return papers
105
 
 
 
 
 
 
 
 
 
 
 
106
  def random_paper_search():
107
+ """Retrieve random papers without user input."""
108
+ random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"]
109
+ query = random.choice(random_queries)
110
+ return retrieve_papers(query, max_results=random.randint(5, 15))
111
 
112
+ # -------------------------------
113
+ # Streamlit UI
114
+ # -------------------------------
115
  st.title("πŸ“š PaperPilot – Intelligent Academic Navigator")
116
 
117
+ with st.sidebar:
118
+ st.header("πŸ” Search Parameters")
119
+ query = st.text_input("Research topic or question:")
120
+
121
+ col1, col2 = st.columns([3, 1])
122
+ with col1:
123
+ search_button = st.button("πŸš€ Find Articles")
124
+ with col2:
125
+ random_button = st.button("🎲 Random Papers")
126
+
127
+ if search_button and query.strip():
128
  with st.spinner("Searching arXiv..."):
129
+ papers = retrieve_papers(query)
130
  if papers:
131
  st.session_state.papers = papers
 
 
132
  else:
133
  st.error("No papers found. Try different keywords.")
134
+ elif random_button:
135
+ with st.spinner("Fetching random papers..."):
136
+ papers = random_paper_search()
137
+ st.session_state.papers = papers
 
138
 
139
+ if 'papers' in st.session_state:
140
+ for idx, paper in enumerate(st.session_state.papers, 1):
141
+ with st.expander(f"{idx}. {paper['title']}"):
142
+ st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
143
+ st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}")
144
+ st.markdown(f"**Trust Score:** {paper['trust_score']} / 100")
145
+ st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100")
146
+ st.markdown(f"**DOI:** [Link]({paper['doi']})")
147
+ st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})")
148
+ st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})")
149
+ st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})")
150
+ st.markdown(f"**Scite:** [Link]({paper['scite']})")
151
+ st.markdown("**Abstract:**")
152
+ st.write(paper['summary'])