Spaces:
Sleeping
Sleeping
# app.py | |
import streamlit as st | |
import arxiv | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
import datetime | |
from transformers import pipeline | |
# Initialize Hugging Face pipelines for summarization and text generation | |
def load_summarizer(): | |
return pipeline("summarization", model="facebook/bart-large-cnn") | |
def load_generator(): | |
return pipeline("text-generation", model="gpt2") | |
summarizer = load_summarizer() | |
generator = load_generator() | |
# ------------------------------- | |
# Helper Functions | |
# ------------------------------- | |
def retrieve_papers(query, max_results=5): | |
""" | |
Retrieve academic papers from arXiv based on the query. | |
""" | |
search = arxiv.Search(query=query, max_results=max_results) | |
papers = [] | |
for result in search.results(): | |
paper = { | |
"title": result.title, | |
"summary": result.summary, | |
"url": result.pdf_url, | |
"authors": [author.name for author in result.authors], | |
"published": result.published | |
} | |
papers.append(paper) | |
return papers | |
def summarize_text(text): | |
""" | |
Use a generative model to create a concise summary of the input text. | |
""" | |
# The summarizer may need the text to be below a certain token length. | |
# If necessary, you could chunk the text. | |
summarized = summarizer(text, max_length=130, min_length=30, do_sample=False) | |
return summarized[0]['summary_text'] | |
def generate_concept_map(papers): | |
""" | |
Generate a visual concept map by connecting papers with shared authors. | |
""" | |
G = nx.Graph() | |
# Add nodes for each paper title | |
for paper in papers: | |
G.add_node(paper['title']) | |
# Create edges between papers that share at least one common author | |
for i in range(len(papers)): | |
for j in range(i + 1, len(papers)): | |
common_authors = set(papers[i]['authors']).intersection(set(papers[j]['authors'])) | |
if common_authors: | |
G.add_edge(papers[i]['title'], papers[j]['title']) | |
return G | |
def generate_citation(paper): | |
""" | |
Format citation information in APA style. | |
""" | |
authors = ", ".join(paper['authors']) | |
year = paper['published'].year if isinstance(paper['published'], datetime.datetime) else "n.d." | |
title = paper['title'] | |
url = paper['url'] | |
citation = f"{authors} ({year}). {title}. Retrieved from {url}" | |
return citation | |
def generate_proposal_suggestions(text): | |
""" | |
Generate research proposal suggestions based on the synthesized literature review. | |
""" | |
prompt = ( | |
"Based on the following literature review, propose a novel research proposal " | |
"including potential research questions and an outline for experimental design.\n\n" | |
f"{text}\n\nProposal:" | |
) | |
generated = generator(prompt, max_new_tokens=50, num_return_sequences=1) | |
return generated[0]['generated_text'] | |
# ------------------------------- | |
# Streamlit User Interface | |
# ------------------------------- | |
st.title("📚PaperPilot – The Intelligent Academic Navigator") | |
st.markdown("Welcome to **PaperPilot**! Enter a research topic or question below to retrieve academic papers, generate summaries, visualize concept maps, format citations, and get research proposal suggestions.") | |
# Input section | |
query = st.text_input("Research Topic or Question:") | |
if st.button("Search"): | |
if query.strip() == "": | |
st.warning("Please enter a research topic or question.") | |
else: | |
# --- Step 1: Retrieve Papers --- | |
with st.spinner("Retrieving relevant academic papers..."): | |
papers = retrieve_papers(query, max_results=5) | |
if not papers: | |
st.error("No papers found. Please try a different query.") | |
else: | |
st.success(f"Found {len(papers)} papers.") | |
# --- Step 2: Display Retrieved Papers --- | |
st.header("Retrieved Papers") | |
for idx, paper in enumerate(papers, start=1): | |
with st.expander(f"{idx}. {paper['title']}"): | |
st.markdown(f"**Authors:** {', '.join(paper['authors'])}") | |
st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'n.d.'}") | |
st.markdown(f"**Link:** [PDF Link]({paper['url']})") | |
st.markdown("**Abstract:**") | |
st.write(paper['summary']) | |
# --- Step 3: Generate Summaries & Literature Review --- | |
st.header("Automated Summaries & Literature Review") | |
combined_summary = "" | |
for paper in papers: | |
st.subheader(f"Summary for: {paper['title']}") | |
# Use the paper summary as input for further summarization | |
summary_text = summarize_text(paper['summary']) | |
st.write(summary_text) | |
combined_summary += summary_text + " " | |
# --- Step 4: Create Visual Concept Map & Gap Analysis --- | |
st.header("Visual Concept Map & Gap Analysis") | |
G = generate_concept_map(papers) | |
if len(G.nodes) > 0: | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
pos = nx.spring_layout(G, seed=42) | |
nx.draw_networkx(G, pos, with_labels=True, node_color='skyblue', edge_color='gray', node_size=1500, font_size=8, ax=ax) | |
st.pyplot(fig) | |
else: | |
st.info("Not enough data to generate a concept map.") | |
# --- Step 5: Citation & Reference Management --- | |
st.header("Formatted Citations (APA Style)") | |
for paper in papers: | |
citation = generate_citation(paper) | |
st.markdown(f"- {citation}") | |
# --- Step 6: Research Proposal Assistance --- | |
st.header("Research Proposal Suggestions") | |
proposal = generate_proposal_suggestions(combined_summary) | |
st.write(proposal) | |
st.caption("Built with ❤️") |