import os import logging import gradio as gr import asyncio from dotenv import load_dotenv from langchain_community.document_loaders import ArxivLoader # Updated import from langchain_community.vectorstores import Chroma # Updated import from langchain_huggingface import HuggingFaceEmbeddings # Updated import from langchain_groq import ChatGroq from PyPDF2 import PdfReader from huggingface_hub import login from groq import AsyncGroq from langchain.docstore.document import Document # Load environment variables load_dotenv() HUGGING_API_KEY = os.getenv("HUGGING_API_KEY") GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not HUGGING_API_KEY or not GROQ_API_KEY: raise ValueError("API keys for HuggingFace or Groq are missing.") # Configure Logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Authenticate with Hugging Face (for model downloads) login(HUGGING_API_KEY) # Load models and embeddings with a local embedding model embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY) client = AsyncGroq(api_key=GROQ_API_KEY) # Global state for PDF vector store pdf_vector_store = None current_pdf_path = None # General Chat async def chat_with_replit(message, history): try: messages = [{"role": "system", "content": "You are an assistant answering user questions."}] for chat in history or []: user_msg, assistant_msg = chat messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = await client.chat.completions.create( messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False ) return response.choices[0].message.content except Exception as e: logger.error(f"Chat error: {e}") return "Error in chat response." def chat_with_replit_sync(message, history): return asyncio.run(chat_with_replit(message, history)) # ArXiv Chat async def chat_with_replit_arxiv(message, history, doi_num): try: loader = ArxivLoader(query=str(doi_num), load_max_docs=10) documents = loader.load_and_split() if not documents: return "No documents found for the provided arXiv number." metadata = documents[0].metadata vector_store = Chroma.from_documents(documents, embedding_model) results = vector_store.similarity_search(message, k=3) relevant_content = "\n\n".join(doc.page_content for doc in results) messages = [ {"role": "user", "content": message}, {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"} ] response = await client.chat.completions.create( messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False ) return response.choices[0].message.content except Exception as e: logger.error(f"Error in chat with ArXiv PDF: {e}") return "Error processing chat with arXiv paper." def chat_with_replit_arxiv_sync(message, history, doi_num): return asyncio.run(chat_with_replit_arxiv(message, history, doi_num)) # Local PDF Chat async def chat_with_replit_local_pdf(message, vector_store): try: if not vector_store: return "Please upload a PDF first and wait for processing to complete." results = vector_store.similarity_search(message, k=3) relevant_content = "\n\n".join(doc.page_content for doc in results) messages = [ {"role": "user", "content": message}, {"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"} ] response = await client.chat.completions.create( messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False ) return response.choices[0].message.content except Exception as e: logger.error(f"Error in chat with local PDF: {e}") return "Error processing chat with local PDF." def process_pdf(pdf_file): global pdf_vector_store, current_pdf_path try: if pdf_file != current_pdf_path: logger.info("Extracting text from PDF...") reader = PdfReader(pdf_file) text = "\n".join(page.extract_text() or "" for page in reader.pages) if not text.strip(): return "Could not extract text from PDF." documents = [Document(page_content=text, metadata={"source": pdf_file})] logger.info("Creating vector store...") pdf_vector_store = Chroma.from_documents(documents, embedding_model) current_pdf_path = pdf_file return "PDF processed successfully. You can now ask questions." return "PDF already processed. Ask away!" except Exception as e: logger.error(f"Error processing PDF: {e}") return f"Error processing PDF: {str(e)}" # Gradio UI with gr.Blocks() as app: with gr.Tab(label="General Chat"): gr.Markdown("### Chat with the Assistant") with gr.Row(): general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message") general_send_button = gr.Button("Send") general_chat_output = gr.Markdown(label="Chat Output") general_chat_history = gr.State([]) def update_general_chat(user_message, history): history = history or [] history.append([user_message, ""]) return history, history def update_general_response(history): user_message = history[-1][0] response = chat_with_replit_sync(user_message, history[:-1]) history[-1][1] = response formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) return history, formatted general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history], outputs=[general_chat_history, general_chat_output]) general_send_button.click(update_general_response, inputs=general_chat_history, outputs=[general_chat_history, general_chat_output]) with gr.Tab(label="Chat with ArXiv Paper"): gr.Markdown("### Ask Questions About an ArXiv Paper") with gr.Row(): arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question") arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number") arxiv_send_button = gr.Button("Send") arxiv_chat_output = gr.Markdown(label="Chat Output") arxiv_chat_history = gr.State([]) def update_arxiv_chat(user_message, history): history = history or [] history.append([user_message, ""]) return history, history def update_arxiv_response(history, doi_num): user_message = history[-1][0] response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num) history[-1][1] = response formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) return history, formatted arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history], outputs=[arxiv_chat_history, arxiv_chat_output]) arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi], outputs=[arxiv_chat_history, arxiv_chat_output]) with gr.Tab(label="Chat with Local PDF"): gr.Markdown("### Ask Questions About an Uploaded PDF") pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"]) pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False) with gr.Row(): pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question") pdf_send_button = gr.Button("Send") pdf_chat_output = gr.Markdown(label="Chat Output") pdf_chat_history = gr.State([]) def update_pdf_chat(user_message, history): history = history or [] history.append([user_message, ""]) return history, history def update_pdf_response(history): user_message = history[-1][0] response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store)) history[-1][1] = response formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history]) return history, formatted pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status) pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history], outputs=[pdf_chat_history, pdf_chat_output]) pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history, outputs=[pdf_chat_history, pdf_chat_output]) app.launch()