PaperPilot

Running

App Files Files Community

flytoe commited on Mar 18

Commit

70b4875

verified ·

1 Parent(s): 7a0e4f2

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -66

app.py CHANGED Viewed

@@ -2,98 +2,151 @@ import os
 import streamlit as st
 import arxiv
 import random
-import networkx as nx
-import matplotlib.pyplot as plt
 import datetime
-from groq import Groq
-client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-def groq_summarize(text: str) -> str:
-    response = client.chat.completions.create(
-        messages=[
-            {"role": "user", "content": f"Summarize in 250 characters:\n{text}"}
-        ],
-        model="llama-3.3-70b-versatile",
-    )
-    return response.choices[0].message.content.strip()
-def groq_generate(text: str) -> str:
-    response = client.chat.completions.create(
-        messages=[{"role": "user", "content": text}],
-        model="llama-3.3-70b-versatile",
-    )
-    return response.choices[0].message.content.strip()
 def retrieve_papers(query, max_results=5):
     search = arxiv.Search(query=query, max_results=max_results)
     papers = []
     for result in search.results():
-        paper_id = result.entry_id.split("/")[-1]
         paper = {
             "title": result.title,
             "summary": result.summary,
             "url": result.pdf_url,
             "authors": [author.name for author in result.authors],
             "published": result.published,
-            "doi": f"https://doi.org/10.48550/arXiv.{paper_id}",
-            "bibliographic_explorer": f"https://arxiv.org/abs/{paper_id}",
-            "litmaps": f"https://app.litmaps.com/preview/{paper_id}",
-            "trust_score": random.randint(60, 100),
-            "relevance_score": random.randint(50, 100)
         }
         papers.append(paper)
     return papers
-def summarize_text(text):
-    return groq_summarize(text)
-def get_cached_summary(paper_id, text):
-    if 'summaries' not in st.session_state:
-        st.session_state.summaries = {}
-    if paper_id not in st.session_state.summaries:
-        st.session_state.summaries[paper_id] = summarize_text(text)
-    return st.session_state.summaries[paper_id]
 def random_paper_search():
-    topics = ["machine learning", "quantum computing", "climate change", "robotics", "health AI"]
-    return random.choice(topics)
 st.title("📚 PaperPilot – Intelligent Academic Navigator")
-st.sidebar.header("🔍 Search Parameters")
-query = st.sidebar.text_input("Research topic or question:")
-if st.sidebar.button("🎲 Random Search"):
-    query = random_paper_search()
-    st.sidebar.text(f"Random Topic: {query}")
-if st.sidebar.button("🚀 Find Articles"):
-    if query.strip():
         with st.spinner("Searching arXiv..."):
-            papers = retrieve_papers(query, random.randint(5, 15))
             if papers:
                 st.session_state.papers = papers
-                st.success(f"Found {len(papers)} papers!")
-                st.session_state.active_section = "articles"
             else:
                 st.error("No papers found. Try different keywords.")
-    else:
-        st.warning("Please enter a search query")
-if 'active_section' not in st.session_state:
-    st.session_state.active_section = "none"
-if 'papers' in st.session_state and st.session_state.papers:
-    papers = st.session_state.papers
-    if st.session_state.active_section == "articles":
-        st.header("📑 Retrieved Papers")
-        for idx, paper in enumerate(papers, 1):
-            with st.expander(f"{idx}. {paper['title']}"):
-                st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
-                pub_date = paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else "n.d."
-                st.markdown(f"**Published:** {pub_date}")
-                st.markdown(f"**[PDF Link]({paper['url']}) | [DOI]({paper['doi']}) | [Bibliographic Explorer]({paper['bibliographic_explorer']}) | [Litmaps]({paper['litmaps']})**")
-                st.markdown(f"**Trust Score:** {paper['trust_score']} | **Relevance Score:** {paper['relevance_score']}")
-                summary = get_cached_summary(f"paper_{idx}", paper['summary'])
-                st.write(summary)
-                if st.button(f"🔍 Explain like I'm 5 (ELI5) {idx}"):
-                    st.write(groq_generate(f"Explain this in simple terms: {summary}"))

 import streamlit as st
 import arxiv
 import random
 import datetime
+import requests
+from scholarly import scholarly
+# -------------------------------
+# Helper Functions
+# -------------------------------
+def get_paper_metadata(arxiv_id):
+    """Fetch metadata like citations and connected papers for scoring."""
+    metadata = {
+        "citations": 0,
+        "institution": "Unknown",
+        "authors": [],
+        "connected_papers": 0
+    }
+    # Fetch citation count from scite.ai
+    scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}"
+    response = requests.get(scite_url)
+    if response.status_code == 200:
+        data = response.json()
+        metadata["citations"] = data.get("citation_count", 0)
+    # Fetch connected paper count from Connected Papers
+    connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}"
+    response = requests.get(connected_papers_url)
+    if response.status_code == 200:
+        data = response.json()
+        metadata["connected_papers"] = len(data.get("nodes", []))
+    return metadata
+def calculate_trust_score(metadata):
+    """Compute trust score based on citations and author credibility."""
+    trust_score = 50  # Base score
+    # Citations factor (max boost 30 points)
+    if metadata["citations"] > 100:
+        trust_score += 30
+    elif metadata["citations"] > 50:
+        trust_score += 20
+    elif metadata["citations"] > 10:
+        trust_score += 10
+    # Connected Papers factor (max boost 20 points)
+    if metadata["connected_papers"] > 20:
+        trust_score += 20
+    elif metadata["connected_papers"] > 10:
+        trust_score += 10
+    return min(trust_score, 100)
+def calculate_relevance_score(paper, query):
+    """Compute relevance score based on keyword match and recency."""
+    relevance_score = 50  # Base score
+    # Keyword match factor
+    query_terms = query.lower().split()
+    title_terms = paper['title'].lower().split()
+    match_count = len(set(query_terms) & set(title_terms))
+    relevance_score += match_count * 5
+    # Publication date factor
+    if isinstance(paper['published'], datetime.datetime):
+        years_old = datetime.datetime.now().year - paper['published'].year
+        if years_old < 1:
+            relevance_score += 15
+        elif years_old < 3:
+            relevance_score += 10
+        elif years_old < 5:
+            relevance_score += 5
+    return min(relevance_score, 100)
 def retrieve_papers(query, max_results=5):
+    """Retrieve academic papers from arXiv."""
     search = arxiv.Search(query=query, max_results=max_results)
     papers = []
     for result in search.results():
+        arxiv_id = result.entry_id.split("/")[-1]
+        metadata = get_paper_metadata(arxiv_id)
+        trust_score = calculate_trust_score(metadata)
+        relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query)
         paper = {
             "title": result.title,
             "summary": result.summary,
             "url": result.pdf_url,
+            "doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}",
+            "bib_explorer": f"https://arxiv.org/abs/{arxiv_id}",
+            "connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}",
+            "litmaps": f"https://app.litmaps.com/preview/{arxiv_id}",
+            "scite": f"https://scite.ai/reports/arxiv:{arxiv_id}",
             "authors": [author.name for author in result.authors],
             "published": result.published,
+            "trust_score": trust_score,
+            "relevance_score": relevance_score
         }
         papers.append(paper)
     return papers
 def random_paper_search():
+    """Retrieve random papers without user input."""
+    random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"]
+    query = random.choice(random_queries)
+    return retrieve_papers(query, max_results=random.randint(5, 15))
+# -------------------------------
+# Streamlit UI
+# -------------------------------
 st.title("📚 PaperPilot – Intelligent Academic Navigator")
+with st.sidebar:
+    st.header("🔍 Search Parameters")
+    query = st.text_input("Research topic or question:")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        search_button = st.button("🚀 Find Articles")
+    with col2:
+        random_button = st.button("🎲 Random Papers")
+    if search_button and query.strip():
         with st.spinner("Searching arXiv..."):
+            papers = retrieve_papers(query)
             if papers:
                 st.session_state.papers = papers
             else:
                 st.error("No papers found. Try different keywords.")
+    elif random_button:
+        with st.spinner("Fetching random papers..."):
+            papers = random_paper_search()
+            st.session_state.papers = papers
+if 'papers' in st.session_state:
+    for idx, paper in enumerate(st.session_state.papers, 1):
+        with st.expander(f"{idx}. {paper['title']}"):
+            st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
+            st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}")
+            st.markdown(f"**Trust Score:** {paper['trust_score']} / 100")
+            st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100")
+            st.markdown(f"**DOI:** [Link]({paper['doi']})")
+            st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})")
+            st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})")
+            st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})")
+            st.markdown(f"**Scite:** [Link]({paper['scite']})")
+            st.markdown("**Abstract:**")
+            st.write(paper['summary'])