Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,17 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
import gradio as gr
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
-
from langchain.document_loaders import ArxivLoader
|
6 |
from langchain.text_splitter import TokenTextSplitter
|
7 |
from langchain.vectorstores import Chroma
|
8 |
from langchain_community.embeddings import HuggingFaceHubEmbeddings
|
9 |
-
from langchain.chains import RetrievalQA
|
10 |
-
from langchain.chains.summarize import load_summarize_chain
|
11 |
from langchain_groq import ChatGroq
|
12 |
-
from transformers import pipeline
|
13 |
from PyPDF2 import PdfReader
|
14 |
from huggingface_hub import login
|
15 |
from groq import AsyncGroq, Groq
|
16 |
-
import
|
17 |
|
18 |
# Load environment variables
|
19 |
load_dotenv()
|
@@ -34,92 +32,17 @@ login(HUGGING_API_KEY)
|
|
34 |
# Load models and embeddings
|
35 |
embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_KEY)
|
36 |
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
|
37 |
-
|
38 |
-
def display_results(result):
|
39 |
-
"""Format and display results properly."""
|
40 |
-
return "\n".join(result)
|
41 |
-
|
42 |
-
def summarize_text(text):
|
43 |
-
"""Summarize text using the Groq API."""
|
44 |
-
try:
|
45 |
-
sum_client = Groq(api_key=GROQ_API_KEY)
|
46 |
-
messages = [
|
47 |
-
{"role": "system", "content": "You are an excellent analyst who excels in summarization task. If I give you the whole text, you should summarize it."},
|
48 |
-
{"role": "user", "content": f"Summarize the paper: {text}"}
|
49 |
-
]
|
50 |
-
|
51 |
-
response = sum_client.chat.completions.create(
|
52 |
-
messages=messages,
|
53 |
-
model="llama3-70b-8192",
|
54 |
-
temperature=0,
|
55 |
-
max_tokens=8192,
|
56 |
-
top_p=1,
|
57 |
-
)
|
58 |
-
return response.choices[0].message.content
|
59 |
-
|
60 |
-
except Exception as e:
|
61 |
-
logger.error(f"Error summarizing text: {e}")
|
62 |
-
return "Error in summarization."
|
63 |
-
|
64 |
-
def summarize_pdf(pdf_file_path, max_length):
|
65 |
-
"""Extract text from a PDF and summarize it."""
|
66 |
-
try:
|
67 |
-
reader = PdfReader(pdf_file_path)
|
68 |
-
text = "\n".join(page.extract_text() or "" for page in reader.pages)
|
69 |
-
|
70 |
-
text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
|
71 |
-
chunks = text_splitter.split_text(text)
|
72 |
-
|
73 |
-
summary = ""
|
74 |
-
for chunk in chunks:
|
75 |
-
summary += summarize_text(chunk)
|
76 |
-
|
77 |
-
return summary
|
78 |
-
|
79 |
-
except Exception as e:
|
80 |
-
logger.error(f"Error summarizing PDF: {e}")
|
81 |
-
return "Failed to process the PDF."
|
82 |
-
|
83 |
-
def summarize_arxiv_pdf(query):
|
84 |
-
"""Summarize an arXiv paper given a query."""
|
85 |
-
try:
|
86 |
-
loader = ArxivLoader(query=query, load_max_docs=10)
|
87 |
-
documents = loader.load()
|
88 |
-
text_splitter = TokenTextSplitter(chunk_size=5700, chunk_overlap=100)
|
89 |
-
chunks = text_splitter.split_documents(documents)
|
90 |
-
|
91 |
-
ref_summary = ""
|
92 |
-
for chunk in chunks:
|
93 |
-
ref_summary += summarize_text(chunk.page_content)
|
94 |
-
|
95 |
-
arxiv_summary = loader.get_summaries_as_docs()
|
96 |
-
|
97 |
-
summaries = []
|
98 |
-
for doc in arxiv_summary:
|
99 |
-
title = doc.metadata.get("Title", "Unknown Title")
|
100 |
-
authors = doc.metadata.get("Authors", "Unknown Authors")
|
101 |
-
url = doc.metadata.get("Entry ID", "No URL")
|
102 |
-
|
103 |
-
summaries.append(f"**{title}**\n")
|
104 |
-
summaries.append(f"**Authors:** {authors}\n")
|
105 |
-
summaries.append(f"**View full paper:** [Link to paper]({url})\n")
|
106 |
-
summaries.append(f"**Summary:** {doc.page_content}\n")
|
107 |
-
summaries.append(f"**Enhanced Summary:**\n {ref_summary}")
|
108 |
-
|
109 |
-
return display_results(summaries)
|
110 |
-
|
111 |
-
except Exception as e:
|
112 |
-
logger.error(f"Error summarizing arXiv paper: {e}")
|
113 |
-
return "Failed to process arXiv paper."
|
114 |
-
|
115 |
client = AsyncGroq(api_key=GROQ_API_KEY)
|
116 |
|
|
|
|
|
|
|
117 |
async def chat_with_replit(message, history):
|
118 |
-
"""
|
119 |
try:
|
120 |
messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
|
121 |
|
122 |
-
for chat in history:
|
123 |
user_msg, assistant_msg = chat
|
124 |
messages.append({"role": "user", "content": user_msg})
|
125 |
messages.append({"role": "assistant", "content": assistant_msg})
|
@@ -132,7 +55,7 @@ async def chat_with_replit(message, history):
|
|
132 |
temperature=0,
|
133 |
max_tokens=1024,
|
134 |
top_p=1,
|
135 |
-
stream=False, #
|
136 |
)
|
137 |
return response.choices[0].message.content
|
138 |
|
@@ -140,13 +63,24 @@ async def chat_with_replit(message, history):
|
|
140 |
logger.error(f"Chat error: {e}")
|
141 |
return "Error in chat response."
|
142 |
|
143 |
-
|
144 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
try:
|
|
|
146 |
loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
|
147 |
documents = loader.load_and_split()
|
|
|
|
|
148 |
metadata = documents[0].metadata
|
149 |
|
|
|
150 |
vector_store = Chroma.from_documents(documents, embedding_model)
|
151 |
|
152 |
def retrieve_relevant_content(user_query):
|
@@ -173,63 +107,145 @@ async def chat_with_replit_pdf(message, history, doi_num):
|
|
173 |
return response.choices[0].message.content
|
174 |
|
175 |
except Exception as e:
|
176 |
-
logger.error(f"Error in chat with PDF: {e}")
|
177 |
-
return "Error processing chat with
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
-
#
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
182 |
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
with gr.Blocks() as app:
|
185 |
-
# Tab
|
186 |
-
with gr.Tab(label="
|
187 |
-
with gr.Row():
|
188 |
-
input_pdf = gr.File(label="Upload PDF file")
|
189 |
-
max_length_slider = gr.Slider(512, 4096, value=2048, step=512, label="Max Length")
|
190 |
-
summarize_pdf_btn = gr.Button(value="Summarize PDF")
|
191 |
-
with gr.Row():
|
192 |
-
output_pdf_summary = gr.Markdown(label="Summary", height=1000)
|
193 |
-
summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
|
194 |
-
|
195 |
-
# Tab for Arxiv Summarization
|
196 |
-
with gr.Tab(label="Arxiv Summarization"):
|
197 |
-
with gr.Column():
|
198 |
-
arxiv_number = gr.Textbox(label="Enter arXiv number, i.e 2502.02523")
|
199 |
-
summarize_btn = gr.Button(value="Summarize arXiv Paper")
|
200 |
-
with gr.Column():
|
201 |
-
output_summary = gr.Markdown(label="Summary", height=1000)
|
202 |
-
summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)
|
203 |
-
|
204 |
-
# New Tab for Chat functionality
|
205 |
-
with gr.Tab(label="Chat with Assistant"):
|
206 |
gr.Markdown("### Chat with the Assistant")
|
207 |
with gr.Row():
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
# When the send button is clicked, update the chat history and get a response.
|
216 |
-
def update_chat(user_message, history):
|
217 |
-
# Append the new user message to history with an empty assistant response for now.
|
218 |
history = history or []
|
219 |
history.append([user_message, ""])
|
220 |
return history, history
|
221 |
|
222 |
-
def
|
223 |
-
# Get the last user message and call the chat function
|
224 |
user_message = history[-1][0]
|
225 |
response = chat_with_replit_sync(user_message, history[:-1])
|
226 |
-
# Update the last entry with the assistant's response
|
227 |
history[-1][1] = response
|
228 |
-
# Format the conversation for display
|
229 |
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
|
230 |
return history, formatted
|
231 |
|
232 |
-
|
233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
app.launch()
|
|
|
|
1 |
import os
|
2 |
import logging
|
3 |
import gradio as gr
|
4 |
+
import asyncio
|
5 |
from dotenv import load_dotenv
|
6 |
+
from langchain.document_loaders import ArxivLoader
|
7 |
from langchain.text_splitter import TokenTextSplitter
|
8 |
from langchain.vectorstores import Chroma
|
9 |
from langchain_community.embeddings import HuggingFaceHubEmbeddings
|
|
|
|
|
10 |
from langchain_groq import ChatGroq
|
|
|
11 |
from PyPDF2 import PdfReader
|
12 |
from huggingface_hub import login
|
13 |
from groq import AsyncGroq, Groq
|
14 |
+
from langchain.docstore.document import Document # For creating a document from PDF text
|
15 |
|
16 |
# Load environment variables
|
17 |
load_dotenv()
|
|
|
32 |
# Load models and embeddings
|
33 |
embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_KEY)
|
34 |
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
client = AsyncGroq(api_key=GROQ_API_KEY)
|
36 |
|
37 |
+
# -----------------------------
|
38 |
+
# Chat Functionality (General)
|
39 |
+
# -----------------------------
|
40 |
async def chat_with_replit(message, history):
|
41 |
+
"""General chat functionality using the Groq API."""
|
42 |
try:
|
43 |
messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
|
44 |
|
45 |
+
for chat in history or []:
|
46 |
user_msg, assistant_msg = chat
|
47 |
messages.append({"role": "user", "content": user_msg})
|
48 |
messages.append({"role": "assistant", "content": assistant_msg})
|
|
|
55 |
temperature=0,
|
56 |
max_tokens=1024,
|
57 |
top_p=1,
|
58 |
+
stream=False, # For simplicity we are not streaming
|
59 |
)
|
60 |
return response.choices[0].message.content
|
61 |
|
|
|
63 |
logger.error(f"Chat error: {e}")
|
64 |
return "Error in chat response."
|
65 |
|
66 |
+
def chat_with_replit_sync(message, history):
|
67 |
+
"""Synchronous wrapper for general chat."""
|
68 |
+
return asyncio.run(chat_with_replit(message, history))
|
69 |
+
|
70 |
+
# -------------------------------------------------
|
71 |
+
# Chat Functionality for ArXiv Paper (Document Chat)
|
72 |
+
# -------------------------------------------------
|
73 |
+
async def chat_with_replit_arxiv(message, history, doi_num):
|
74 |
+
"""Chat answering questions using an ArXiv paper as context."""
|
75 |
try:
|
76 |
+
# Load the ArXiv document and split it into chunks
|
77 |
loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
|
78 |
documents = loader.load_and_split()
|
79 |
+
if not documents:
|
80 |
+
return "No documents found for the provided arXiv number."
|
81 |
metadata = documents[0].metadata
|
82 |
|
83 |
+
# Create vector store for the loaded documents
|
84 |
vector_store = Chroma.from_documents(documents, embedding_model)
|
85 |
|
86 |
def retrieve_relevant_content(user_query):
|
|
|
107 |
return response.choices[0].message.content
|
108 |
|
109 |
except Exception as e:
|
110 |
+
logger.error(f"Error in chat with arXiv PDF: {e}")
|
111 |
+
return "Error processing chat with arXiv paper."
|
112 |
+
|
113 |
+
def chat_with_replit_arxiv_sync(message, history, doi_num):
|
114 |
+
"""Synchronous wrapper for arXiv chat."""
|
115 |
+
return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
|
116 |
+
|
117 |
+
# -------------------------------------------------
|
118 |
+
# Chat Functionality for Local PDF (Document Chat)
|
119 |
+
# -------------------------------------------------
|
120 |
+
async def chat_with_replit_local_pdf(message, history, pdf_file_path):
|
121 |
+
"""Chat answering questions using a local PDF as context."""
|
122 |
+
try:
|
123 |
+
# Extract text from the uploaded PDF file
|
124 |
+
reader = PdfReader(pdf_file_path)
|
125 |
+
text = "\n".join(page.extract_text() or "" for page in reader.pages)
|
126 |
+
if not text.strip():
|
127 |
+
return "Could not extract text from PDF."
|
128 |
+
|
129 |
+
# Create a document from the PDF text
|
130 |
+
documents = [Document(page_content=text, metadata={"source": pdf_file_path})]
|
131 |
|
132 |
+
# Create a vector store using the document
|
133 |
+
vector_store = Chroma.from_documents(documents, embedding_model)
|
134 |
+
|
135 |
+
def retrieve_relevant_content(user_query):
|
136 |
+
results = vector_store.similarity_search(user_query, k=3)
|
137 |
+
return "\n\n".join(doc.page_content for doc in results)
|
138 |
|
139 |
+
relevant_content = retrieve_relevant_content(message)
|
140 |
+
|
141 |
+
messages = [
|
142 |
+
{"role": "user", "content": message},
|
143 |
+
{"role": "system", "content": f"Answer based on this PDF document: {pdf_file_path}.\n"
|
144 |
+
f"Relevant Content: {relevant_content}"}
|
145 |
+
]
|
146 |
+
|
147 |
+
response = await client.chat.completions.create(
|
148 |
+
messages=messages,
|
149 |
+
model="llama3-70b-8192",
|
150 |
+
temperature=0,
|
151 |
+
max_tokens=1024,
|
152 |
+
top_p=1,
|
153 |
+
stream=False,
|
154 |
+
)
|
155 |
+
return response.choices[0].message.content
|
156 |
+
|
157 |
+
except Exception as e:
|
158 |
+
logger.error(f"Error in chat with local PDF: {e}")
|
159 |
+
return "Error processing chat with local PDF."
|
160 |
+
|
161 |
+
def chat_with_replit_local_pdf_sync(message, history, pdf_file):
|
162 |
+
"""Synchronous wrapper for local PDF chat."""
|
163 |
+
return asyncio.run(chat_with_replit_local_pdf(message, history, pdf_file))
|
164 |
+
|
165 |
+
# ------------------------------------
|
166 |
+
# Gradio UI Integration
|
167 |
+
# ------------------------------------
|
168 |
with gr.Blocks() as app:
|
169 |
+
# --- Tab: General Chat ---
|
170 |
+
with gr.Tab(label="General Chat"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
gr.Markdown("### Chat with the Assistant")
|
172 |
with gr.Row():
|
173 |
+
general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
|
174 |
+
general_send_button = gr.Button("Send")
|
175 |
+
general_chat_output = gr.Markdown(label="Chat Output", height=300)
|
176 |
+
general_chat_history = gr.State([])
|
177 |
+
|
178 |
+
def update_general_chat(user_message, history):
|
179 |
+
# Append the new message with an empty assistant reply for now.
|
|
|
|
|
|
|
180 |
history = history or []
|
181 |
history.append([user_message, ""])
|
182 |
return history, history
|
183 |
|
184 |
+
def update_general_response(history):
|
|
|
185 |
user_message = history[-1][0]
|
186 |
response = chat_with_replit_sync(user_message, history[:-1])
|
|
|
187 |
history[-1][1] = response
|
|
|
188 |
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
|
189 |
return history, formatted
|
190 |
|
191 |
+
general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
|
192 |
+
outputs=[general_chat_history, general_chat_output])
|
193 |
+
general_send_button.click(update_general_response, inputs=general_chat_history,
|
194 |
+
outputs=[general_chat_history, general_chat_output])
|
195 |
+
|
196 |
+
# --- Tab: Chat with ArXiv Paper ---
|
197 |
+
with gr.Tab(label="Chat with ArXiv Paper"):
|
198 |
+
gr.Markdown("### Ask Questions About an ArXiv Paper")
|
199 |
+
with gr.Row():
|
200 |
+
arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
|
201 |
+
arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
|
202 |
+
arxiv_send_button = gr.Button("Send")
|
203 |
+
arxiv_chat_output = gr.Markdown(label="Chat Output", height=300)
|
204 |
+
arxiv_chat_history = gr.State([])
|
205 |
+
|
206 |
+
def update_arxiv_chat(user_message, history):
|
207 |
+
history = history or []
|
208 |
+
history.append([user_message, ""])
|
209 |
+
return history, history
|
210 |
+
|
211 |
+
def update_arxiv_response(history, doi_num):
|
212 |
+
user_message = history[-1][0]
|
213 |
+
response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num)
|
214 |
+
history[-1][1] = response
|
215 |
+
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
|
216 |
+
return history, formatted
|
217 |
+
|
218 |
+
arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
|
219 |
+
outputs=[arxiv_chat_history, arxiv_chat_output])
|
220 |
+
arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
|
221 |
+
outputs=[arxiv_chat_history, arxiv_chat_output])
|
222 |
+
|
223 |
+
# --- Tab: Chat with Local PDF ---
|
224 |
+
with gr.Tab(label="Chat with Local PDF"):
|
225 |
+
gr.Markdown("### Ask Questions About an Uploaded PDF")
|
226 |
+
with gr.Row():
|
227 |
+
pdf_file_input = gr.File(label="Upload PDF file")
|
228 |
+
pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
|
229 |
+
pdf_send_button = gr.Button("Send")
|
230 |
+
pdf_chat_output = gr.Markdown(label="Chat Output", height=300)
|
231 |
+
pdf_chat_history = gr.State([])
|
232 |
+
|
233 |
+
def update_pdf_chat(user_message, history):
|
234 |
+
history = history or []
|
235 |
+
history.append([user_message, ""])
|
236 |
+
return history, history
|
237 |
+
|
238 |
+
def update_pdf_response(history, pdf_file):
|
239 |
+
user_message = history[-1][0]
|
240 |
+
response = chat_with_replit_local_pdf_sync(user_message, history[:-1], pdf_file)
|
241 |
+
history[-1][1] = response
|
242 |
+
formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
|
243 |
+
return history, formatted
|
244 |
+
|
245 |
+
pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
|
246 |
+
outputs=[pdf_chat_history, pdf_chat_output])
|
247 |
+
pdf_send_button.click(update_pdf_response, inputs=[pdf_chat_history, pdf_file_input],
|
248 |
+
outputs=[pdf_chat_history, pdf_chat_output])
|
249 |
|
250 |
app.launch()
|
251 |
+
|