sunbal7 commited on
Commit
ff54315
·
verified ·
1 Parent(s): 449bb7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -169
app.py CHANGED
@@ -1,175 +1,157 @@
1
  # app.py
 
2
  import streamlit as st
3
  import arxiv
4
- import requests
5
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
6
- from keybert import KeyBERT
7
- from pyvis.network import Network
8
- from pybtex.database import parse_string
9
- import numpy as np
10
- from sklearn.feature_extraction.text import TfidfVectorizer
11
- from sklearn.decomposition import LatentDirichletAllocation
12
- import time
13
- import json
14
-
15
- # Initialize models
16
- @st.cache_resource
17
- def load_models():
18
- # Summarization model
19
- tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
20
- summarizer = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
21
-
22
- # Keyword model
23
- kw_model = KeyBERT()
24
-
25
- # Research suggestion model
26
- suggestion_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
27
- suggestion_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
28
-
29
- return tokenizer, summarizer, kw_model, suggestion_tokenizer, suggestion_model
30
-
31
- def fetch_arxiv_papers(query, max_results=10):
32
- client = arxiv.Client()
33
- search = arxiv.Search(
34
- query=query,
35
- max_results=max_results,
36
- sort_by=arxiv.SortCriterion.Relevance
37
- )
38
- results = []
39
- for result in client.results(search):
40
- results.append({
41
  "title": result.title,
42
- "abstract": result.summary,
43
- "authors": [a.name for a in result.authors],
44
- "published": result.published.strftime("%Y-%m-%d"),
45
- "pdf_url": result.pdf_url,
46
- "doi": result.doi
47
- })
48
- return results
49
-
50
- def fetch_semantic_scholar(query, max_results=5):
51
- url = "https://api.semanticscholar.org/graph/v1/paper/search"
52
- params = {
53
- "query": query,
54
- "limit": max_results,
55
- "fields": "title,abstract,authors,year,references,url"
56
- }
57
- headers = {"x-api-key": "YOUR_API_KEY"}
58
- response = requests.get(url, params=params, headers=headers)
59
- if response.status_code == 200:
60
- return response.json().get("data", [])
61
- return []
62
-
63
- def generate_summary(text, tokenizer, model, max_length=300):
64
- inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
65
- summary_ids = model.generate(
66
- inputs.input_ids,
67
- max_length=max_length,
68
- min_length=50,
69
- length_penalty=2.0,
70
- num_beams=4,
71
- early_stopping=True
72
- )
73
- return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
74
-
75
- def generate_concept_map(texts, model):
76
- keywords = []
77
- for text in texts:
78
- kws = model.extract_keywords(text, keyphrase_ngram_range=(1,2))
79
- keywords.extend([kw[0] for kw in kws])
80
-
81
- vectorizer = TfidfVectorizer()
82
- X = vectorizer.fit_transform(keywords)
83
-
84
- net = Network(height="400px", width="100%")
85
- unique_kws = list(set(keywords))
86
-
87
- for kw in unique_kws:
88
- net.add_node(kw, label=kw)
89
-
90
- similarities = (X * X.T).A
91
- np.fill_diagonal(similarities, 0)
92
-
93
- for i in range(len(unique_kws)):
94
- for j in range(i+1, len(unique_kws)):
95
- if similarities[i,j] > 0.2:
96
- net.add_edge(unique_kws[i], unique_kws[j], value=similarities[i,j])
97
-
98
- return net
99
-
100
- def generate_citations(papers):
101
- citations = []
102
- for paper in papers:
103
- entry = {
104
- "title": paper.get("title", ""),
105
- "authors": paper.get("authors", []),
106
- "year": paper.get("year", ""),
107
- "url": paper.get("pdf_url") or paper.get("url", "")
108
  }
109
- citations.append(entry)
110
- return citations
111
-
112
- def generate_research_suggestions(context, tokenizer, model):
113
- input_text = f"Based on this research context: {context}\nGenerate three research questions:"
114
- inputs = tokenizer(input_text, return_tensors="pt")
115
- outputs = model.generate(**inputs, max_length=200)
116
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
117
-
118
- def main():
119
- st.title("PaperPilot - Intelligent Academic Navigator")
120
-
121
- # Load models
122
- tokenizer, summarizer, kw_model, suggestion_tokenizer, suggestion_model = load_models()
123
-
124
- # User input
125
- query = st.text_input("Enter your research topic or question:")
126
-
127
- if query:
128
- with st.spinner("Searching academic databases..."):
129
- arxiv_results = fetch_arxiv_papers(query)
130
- ss_results = fetch_semantic_scholar(query)
131
- all_papers = arxiv_results + ss_results
132
-
133
- if not all_papers:
134
- st.warning("No papers found. Try a different query.")
135
- return
136
-
137
- # Display papers
138
- st.subheader("Relevant Papers")
139
- for idx, paper in enumerate(all_papers[:5]):
140
- with st.expander(f"{paper['title']}"):
141
- st.write(f"**Abstract:** {paper['abstract']}")
142
-
143
- # Generate summary
144
- summary = generate_summary(paper['abstract'], tokenizer, summarizer)
145
- st.write(f"**Summary:** {summary}")
146
-
147
- # Display metadata
148
- st.write(f"**Authors:** {', '.join(paper.get('authors', []))}")
149
- st.write(f"**Published:** {paper.get('published') or paper.get('year'))}")
150
- st.write(f"**URL:** {paper.get('pdf_url') or paper.get('url'))}")
151
-
152
- # Concept Map
153
- st.subheader("Research Concept Map")
154
- texts = [p['abstract'] for p in all_papers]
155
- net = generate_concept_map(texts, kw_model)
156
- net.save_graph("concept_map.html")
157
- HtmlFile = open("concept_map.html", 'r', encoding='utf-8')
158
- components.html(HtmlFile.read(), height=500)
159
-
160
- # Citations
161
- st.subheader("Citation Management")
162
- citations = generate_citations(all_papers)
163
- citation_format = st.selectbox("Select citation style:", ["APA", "MLA", "Chicago"])
164
-
165
- for cite in citations:
166
- st.code(f"{cite['authors'][0]} et al. ({cite['year']}). {cite['title']}. URL: {cite['url']}")
 
 
 
 
 
 
 
 
 
 
 
167
 
168
- # Research Suggestions
169
- st.subheader("Research Proposal Suggestions")
170
- context = " ".join([p['abstract'] for p in all_papers[:3]])
171
- suggestions = generate_research_suggestions(context, suggestion_tokenizer, suggestion_model)
172
- st.write(suggestions)
173
-
174
- if __name__ == "__main__":
175
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
+
3
  import streamlit as st
4
  import arxiv
5
+ import networkx as nx
6
+ import matplotlib.pyplot as plt
7
+ import datetime
8
+
9
+ from transformers import pipeline
10
+
11
+ # Initialize Hugging Face pipelines for summarization and text generation
12
+ @st.cache_resource(show_spinner=False)
13
+ def load_summarizer():
14
+ return pipeline("summarization", model="facebook/bart-large-cnn")
15
+
16
+ @st.cache_resource(show_spinner=False)
17
+ def load_generator():
18
+ return pipeline("text-generation", model="gpt2")
19
+
20
+ summarizer = load_summarizer()
21
+ generator = load_generator()
22
+
23
+ # -------------------------------
24
+ # Helper Functions
25
+ # -------------------------------
26
+
27
+ def retrieve_papers(query, max_results=5):
28
+ """
29
+ Retrieve academic papers from arXiv based on the query.
30
+ """
31
+ search = arxiv.Search(query=query, max_results=max_results)
32
+ papers = []
33
+ for result in search.results():
34
+ paper = {
 
 
 
 
 
 
 
35
  "title": result.title,
36
+ "summary": result.summary,
37
+ "url": result.pdf_url,
38
+ "authors": [author.name for author in result.authors],
39
+ "published": result.published
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
+ papers.append(paper)
42
+ return papers
43
+
44
+ def summarize_text(text):
45
+ """
46
+ Use a generative model to create a concise summary of the input text.
47
+ """
48
+ # The summarizer may need the text to be below a certain token length.
49
+ # If necessary, you could chunk the text.
50
+ summarized = summarizer(text, max_length=130, min_length=30, do_sample=False)
51
+ return summarized[0]['summary_text']
52
+
53
+ def generate_concept_map(papers):
54
+ """
55
+ Generate a visual concept map by connecting papers with shared authors.
56
+ """
57
+ G = nx.Graph()
58
+ # Add nodes for each paper title
59
+ for paper in papers:
60
+ G.add_node(paper['title'])
61
+ # Create edges between papers that share at least one common author
62
+ for i in range(len(papers)):
63
+ for j in range(i + 1, len(papers)):
64
+ common_authors = set(papers[i]['authors']).intersection(set(papers[j]['authors']))
65
+ if common_authors:
66
+ G.add_edge(papers[i]['title'], papers[j]['title'])
67
+ return G
68
+
69
+ def generate_citation(paper):
70
+ """
71
+ Format citation information in APA style.
72
+ """
73
+ authors = ", ".join(paper['authors'])
74
+ year = paper['published'].year if isinstance(paper['published'], datetime.datetime) else "n.d."
75
+ title = paper['title']
76
+ url = paper['url']
77
+ citation = f"{authors} ({year}). {title}. Retrieved from {url}"
78
+ return citation
79
+
80
+ def generate_proposal_suggestions(text):
81
+ """
82
+ Generate research proposal suggestions based on the synthesized literature review.
83
+ """
84
+ prompt = (
85
+ "Based on the following literature review, propose a novel research proposal "
86
+ "including potential research questions and an outline for experimental design.\n\n"
87
+ f"{text}\n\nProposal:"
88
+ )
89
+ generated = generator(prompt, max_new_tokens=50, num_return_sequences=1)
90
+ return generated[0]['generated_text']
91
+
92
+ # -------------------------------
93
+ # Streamlit User Interface
94
+ # -------------------------------
95
+
96
+ st.title("📚PaperPilot – The Intelligent Academic Navigator")
97
+ st.markdown("Welcome to **PaperPilot**! Enter a research topic or question below to retrieve academic papers, generate summaries, visualize concept maps, format citations, and get research proposal suggestions.")
98
+
99
+ # Input section
100
+ query = st.text_input("Research Topic or Question:")
101
+
102
+ if st.button("Search"):
103
+
104
+ if query.strip() == "":
105
+ st.warning("Please enter a research topic or question.")
106
+ else:
107
+ # --- Step 1: Retrieve Papers ---
108
+ with st.spinner("Retrieving relevant academic papers..."):
109
+ papers = retrieve_papers(query, max_results=5)
110
 
111
+ if not papers:
112
+ st.error("No papers found. Please try a different query.")
113
+ else:
114
+ st.success(f"Found {len(papers)} papers.")
115
+
116
+ # --- Step 2: Display Retrieved Papers ---
117
+ st.header("Retrieved Papers")
118
+ for idx, paper in enumerate(papers, start=1):
119
+ with st.expander(f"{idx}. {paper['title']}"):
120
+ st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
121
+ st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'n.d.'}")
122
+ st.markdown(f"**Link:** [PDF Link]({paper['url']})")
123
+ st.markdown("**Abstract:**")
124
+ st.write(paper['summary'])
125
+
126
+ # --- Step 3: Generate Summaries & Literature Review ---
127
+ st.header("Automated Summaries & Literature Review")
128
+ combined_summary = ""
129
+ for paper in papers:
130
+ st.subheader(f"Summary for: {paper['title']}")
131
+ # Use the paper summary as input for further summarization
132
+ summary_text = summarize_text(paper['summary'])
133
+ st.write(summary_text)
134
+ combined_summary += summary_text + " "
135
+
136
+ # --- Step 4: Create Visual Concept Map & Gap Analysis ---
137
+ st.header("Visual Concept Map & Gap Analysis")
138
+ G = generate_concept_map(papers)
139
+ if len(G.nodes) > 0:
140
+ fig, ax = plt.subplots(figsize=(8, 6))
141
+ pos = nx.spring_layout(G, seed=42)
142
+ nx.draw_networkx(G, pos, with_labels=True, node_color='skyblue', edge_color='gray', node_size=1500, font_size=8, ax=ax)
143
+ st.pyplot(fig)
144
+ else:
145
+ st.info("Not enough data to generate a concept map.")
146
+
147
+ # --- Step 5: Citation & Reference Management ---
148
+ st.header("Formatted Citations (APA Style)")
149
+ for paper in papers:
150
+ citation = generate_citation(paper)
151
+ st.markdown(f"- {citation}")
152
+
153
+ # --- Step 6: Research Proposal Assistance ---
154
+ st.header("Research Proposal Suggestions")
155
+ proposal = generate_proposal_suggestions(combined_summary)
156
+ st.write(proposal)
157
+ st.caption("Built with ❤️")