Spaces:

tdurzynski
/

chat-with-your-data

Sleeping

App Files Files Community

tdurzynski commited on Feb 23

Commit

42866ce

verified ·

1 Parent(s): 9ca2091

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -103

app.py CHANGED Viewed

@@ -4,23 +4,21 @@ import gradio as gr
 import asyncio
 from dotenv import load_dotenv
 from langchain.document_loaders import ArxivLoader
-from langchain.text_splitter import TokenTextSplitter
 from langchain.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceHubEmbeddings
 from langchain_groq import ChatGroq
 from PyPDF2 import PdfReader
 from huggingface_hub import login
-from groq import AsyncGroq, Groq
-from langchain.docstore.document import Document  # For creating a document from PDF text
 # Load environment variables
 load_dotenv()
 HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-# Ensure API keys are set
 if not HUGGING_API_KEY or not GROQ_API_KEY:
-    raise ValueError("API keys for HuggingFace or Groq are missing. Set them in your environment variables.")
 # Configure Logging
 logging.basicConfig(level=logging.INFO)
@@ -34,149 +32,106 @@ embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_
 llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
 client = AsyncGroq(api_key=GROQ_API_KEY)
-# -----------------------------
-# Chat Functionality (General)
-# -----------------------------
 async def chat_with_replit(message, history):
-    """General chat functionality using the Groq API."""
     try:
         messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
         for chat in history or []:
             user_msg, assistant_msg = chat
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": assistant_msg})
         messages.append({"role": "user", "content": message})
         response = await client.chat.completions.create(
-            messages=messages,
-            model="llama3-70b-8192",
-            temperature=0,
-            max_tokens=1024,
-            top_p=1,
-            stream=False,  # For simplicity we are not streaming
         )
         return response.choices[0].message.content
     except Exception as e:
         logger.error(f"Chat error: {e}")
         return "Error in chat response."
 def chat_with_replit_sync(message, history):
-    """Synchronous wrapper for general chat."""
     return asyncio.run(chat_with_replit(message, history))
-# -------------------------------------------------
-# Chat Functionality for ArXiv Paper (Document Chat)
-# -------------------------------------------------
 async def chat_with_replit_arxiv(message, history, doi_num):
-    """Chat answering questions using an ArXiv paper as context."""
     try:
-        # Load the ArXiv document and split it into chunks
         loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
         documents = loader.load_and_split()
         if not documents:
             return "No documents found for the provided arXiv number."
         metadata = documents[0].metadata
-        # Create vector store for the loaded documents
         vector_store = Chroma.from_documents(documents, embedding_model)
-        def retrieve_relevant_content(user_query):
-            results = vector_store.similarity_search(user_query, k=3)
-            return "\n\n".join(doc.page_content for doc in results)
-        relevant_content = retrieve_relevant_content(message)
         messages = [
             {"role": "user", "content": message},
-            {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\n"
-                                          f"Metadata: {metadata}.\n"
-                                          f"Relevant Content: {relevant_content}"}
         ]
         response = await client.chat.completions.create(
-            messages=messages,
-            model="llama3-70b-8192",
-            temperature=0,
-            max_tokens=1024,
-            top_p=1,
-            stream=False,
         )
         return response.choices[0].message.content
     except Exception as e:
-        logger.error(f"Error in chat with arXiv PDF: {e}")
         return "Error processing chat with arXiv paper."
 def chat_with_replit_arxiv_sync(message, history, doi_num):
-    """Synchronous wrapper for arXiv chat."""
     return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
-# -------------------------------------------------
-# Chat Functionality for Local PDF (Document Chat)
-# -------------------------------------------------
-async def chat_with_replit_local_pdf(message, history, pdf_file_path):
-    """Chat answering questions using a local PDF as context."""
     try:
-        # Extract text from the uploaded PDF file
-        reader = PdfReader(pdf_file_path)
-        text = "\n".join(page.extract_text() or "" for page in reader.pages)
-        if not text.strip():
-            return "Could not extract text from PDF."
-        # Create a document from the PDF text
-        documents = [Document(page_content=text, metadata={"source": pdf_file_path})]
-        # Create a vector store using the document
-        vector_store = Chroma.from_documents(documents, embedding_model)
-        def retrieve_relevant_content(user_query):
-            results = vector_store.similarity_search(user_query, k=3)
-            return "\n\n".join(doc.page_content for doc in results)
-        relevant_content = retrieve_relevant_content(message)
         messages = [
             {"role": "user", "content": message},
-            {"role": "system", "content": f"Answer based on this PDF document: {pdf_file_path}.\n"
-                                          f"Relevant Content: {relevant_content}"}
         ]
         response = await client.chat.completions.create(
-            messages=messages,
-            model="llama3-70b-8192",
-            temperature=0,
-            max_tokens=1024,
-            top_p=1,
-            stream=False,
         )
         return response.choices[0].message.content
     except Exception as e:
         logger.error(f"Error in chat with local PDF: {e}")
         return "Error processing chat with local PDF."
-def chat_with_replit_local_pdf_sync(message, history, pdf_file):
-    """Synchronous wrapper for local PDF chat."""
-    return asyncio.run(chat_with_replit_local_pdf(message, history, pdf_file))
-# ------------------------------------
-# Gradio UI Integration
-# ------------------------------------
 with gr.Blocks() as app:
-    # --- Tab: General Chat ---
     with gr.Tab(label="General Chat"):
         gr.Markdown("### Chat with the Assistant")
         with gr.Row():
             general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
             general_send_button = gr.Button("Send")
-        general_chat_output = gr.Markdown(label="Chat Output", height=300)
         general_chat_history = gr.State([])
         def update_general_chat(user_message, history):
-            # Append the new message with an empty assistant reply for now.
             history = history or []
             history.append([user_message, ""])
             return history, history
@@ -189,18 +144,18 @@ with gr.Blocks() as app:
             return history, formatted
         general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
-                                    outputs=[general_chat_history, general_chat_output])
         general_send_button.click(update_general_response, inputs=general_chat_history,
-                                    outputs=[general_chat_history, general_chat_output])
-    # --- Tab: Chat with ArXiv Paper ---
     with gr.Tab(label="Chat with ArXiv Paper"):
         gr.Markdown("### Ask Questions About an ArXiv Paper")
         with gr.Row():
             arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
             arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
             arxiv_send_button = gr.Button("Send")
-        arxiv_chat_output = gr.Markdown(label="Chat Output", height=300)
         arxiv_chat_history = gr.State([])
         def update_arxiv_chat(user_message, history):
@@ -216,18 +171,19 @@ with gr.Blocks() as app:
             return history, formatted
         arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
-                                  outputs=[arxiv_chat_history, arxiv_chat_output])
         arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
-                                  outputs=[arxiv_chat_history, arxiv_chat_output])
-    # --- Tab: Chat with Local PDF ---
     with gr.Tab(label="Chat with Local PDF"):
         gr.Markdown("### Ask Questions About an Uploaded PDF")
         with gr.Row():
-            pdf_file_input = gr.File(label="Upload PDF file")
             pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
             pdf_send_button = gr.Button("Send")
-        pdf_chat_output = gr.Markdown(label="Chat Output", height=300)
         pdf_chat_history = gr.State([])
         def update_pdf_chat(user_message, history):
@@ -235,17 +191,18 @@ with gr.Blocks() as app:
             history.append([user_message, ""])
             return history, history
-        def update_pdf_response(history, pdf_file):
             user_message = history[-1][0]
-            response = chat_with_replit_local_pdf_sync(user_message, history[:-1], pdf_file)
             history[-1][1] = response
             formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
             return history, formatted
         pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
-                                outputs=[pdf_chat_history, pdf_chat_output])
-        pdf_send_button.click(update_pdf_response, inputs=[pdf_chat_history, pdf_file_input],
-                                outputs=[pdf_chat_history, pdf_chat_output])
 app.launch()

 import asyncio
 from dotenv import load_dotenv
 from langchain.document_loaders import ArxivLoader
 from langchain.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceHubEmbeddings
 from langchain_groq import ChatGroq
 from PyPDF2 import PdfReader
 from huggingface_hub import login
+from groq import AsyncGroq
+from langchain.docstore.document import Document
 # Load environment variables
 load_dotenv()
 HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 if not HUGGING_API_KEY or not GROQ_API_KEY:
+    raise ValueError("API keys for HuggingFace or Groq are missing.")
 # Configure Logging
 logging.basicConfig(level=logging.INFO)
 llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
 client = AsyncGroq(api_key=GROQ_API_KEY)
+# Global state for PDF vector store
+pdf_vector_store = None
+current_pdf_path = None
+# General Chat (unchanged)
 async def chat_with_replit(message, history):
     try:
         messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
         for chat in history or []:
             user_msg, assistant_msg = chat
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": assistant_msg})
         messages.append({"role": "user", "content": message})
         response = await client.chat.completions.create(
+            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
         )
         return response.choices[0].message.content
     except Exception as e:
         logger.error(f"Chat error: {e}")
         return "Error in chat response."
 def chat_with_replit_sync(message, history):
     return asyncio.run(chat_with_replit(message, history))
+# ArXiv Chat (unchanged)
 async def chat_with_replit_arxiv(message, history, doi_num):
     try:
         loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
         documents = loader.load_and_split()
         if not documents:
             return "No documents found for the provided arXiv number."
         metadata = documents[0].metadata
         vector_store = Chroma.from_documents(documents, embedding_model)
+        results = vector_store.similarity_search(message, k=3)
+        relevant_content = "\n\n".join(doc.page_content for doc in results)
         messages = [
             {"role": "user", "content": message},
+            {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"}
         ]
         response = await client.chat.completions.create(
+            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
         )
         return response.choices[0].message.content
     except Exception as e:
+        logger.error(f"Error in chat with ArXiv PDF: {e}")
         return "Error processing chat with arXiv paper."
 def chat_with_replit_arxiv_sync(message, history, doi_num):
     return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
+# Local PDF Chat
+async def chat_with_replit_local_pdf(message, vector_store):
     try:
+        if not vector_store:
+            return "Please upload a PDF first and wait for processing to complete."
+        results = vector_store.similarity_search(message, k=3)
+        relevant_content = "\n\n".join(doc.page_content for doc in results)
         messages = [
             {"role": "user", "content": message},
+            {"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"}
         ]
         response = await client.chat.completions.create(
+            messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
         )
         return response.choices[0].message.content
     except Exception as e:
         logger.error(f"Error in chat with local PDF: {e}")
         return "Error processing chat with local PDF."
+def process_pdf(pdf_file):
+    global pdf_vector_store, current_pdf_path
+    try:
+        if pdf_file != current_pdf_path:
+            logger.info("Extracting text from PDF...")
+            reader = PdfReader(pdf_file)
+            text = "\n".join(page.extract_text() or "" for page in reader.pages)
+            if not text.strip():
+                return "Could not extract text from PDF."
+            documents = [Document(page_content=text, metadata={"source": pdf_file})]
+            logger.info("Creating vector store...")
+            pdf_vector_store = Chroma.from_documents(documents, embedding_model)
+            current_pdf_path = pdf_file
+            return "PDF processed successfully. You can now ask questions."
+        return "PDF already processed. Ask away!"
+    except Exception as e:
+        logger.error(f"Error processing PDF: {e}")
+        return f"Error processing PDF: {str(e)}"
+# Gradio UI
 with gr.Blocks() as app:
+    # General Chat (unchanged)
     with gr.Tab(label="General Chat"):
         gr.Markdown("### Chat with the Assistant")
         with gr.Row():
             general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
             general_send_button = gr.Button("Send")
+        general_chat_output = gr.Markdown(label="Chat Output")
         general_chat_history = gr.State([])
         def update_general_chat(user_message, history):
             history = history or []
             history.append([user_message, ""])
             return history, history
             return history, formatted
         general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
+                                  outputs=[general_chat_history, general_chat_output])
         general_send_button.click(update_general_response, inputs=general_chat_history,
+                                  outputs=[general_chat_history, general_chat_output])
+    # ArXiv Chat (unchanged)
     with gr.Tab(label="Chat with ArXiv Paper"):
         gr.Markdown("### Ask Questions About an ArXiv Paper")
         with gr.Row():
             arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
             arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
             arxiv_send_button = gr.Button("Send")
+        arxiv_chat_output = gr.Markdown(label="Chat Output")
         arxiv_chat_history = gr.State([])
         def update_arxiv_chat(user_message, history):
             return history, formatted
         arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
+                                outputs=[arxiv_chat_history, arxiv_chat_output])
         arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
+                                outputs=[arxiv_chat_history, arxiv_chat_output])
+    # Local PDF Chat
     with gr.Tab(label="Chat with Local PDF"):
         gr.Markdown("### Ask Questions About an Uploaded PDF")
+        pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
+        pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False)
         with gr.Row():
             pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
             pdf_send_button = gr.Button("Send")
+        pdf_chat_output = gr.Markdown(label="Chat Output")
         pdf_chat_history = gr.State([])
         def update_pdf_chat(user_message, history):
             history.append([user_message, ""])
             return history, history
+        def update_pdf_response(history):
             user_message = history[-1][0]
+            response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store))
             history[-1][1] = response
             formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
             return history, formatted
+        pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status)
         pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
+                              outputs=[pdf_chat_history, pdf_chat_output])
+        pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history,
+                              outputs=[pdf_chat_history, pdf_chat_output])
 app.launch()