Spaces:

logasanjeev
/

DocTalk

Sleeping

App Files Files Community

logasanjeev commited on Apr 20

Commit

88ec884

verified ·

1 Parent(s): 8b40c0d

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -34

app.py CHANGED Viewed

@@ -24,20 +24,11 @@ logger = logging.getLogger(__name__)
 # Environment setup for Hugging Face token
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "default-token")
 if os.environ["HUGGINGFACEHUB_API_TOKEN"] == "default-token":
-    logger.warning("HUGGINGFACEHUB_API_TOKEN not set. Some models may not work.")
-# Model and embedding options
-LLM_MODELS = {
-    "High Accuracy (Mixtral-8x7B)": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "Balanced (Gemma-2-2B)": "google/gemma-2-2b-it",
-    "Lightweight (Mistral-7B)": "mistralai/Mistral-7B-Instruct-v0.2"
-}
-EMBEDDING_MODELS = {
-    "Lightweight (MiniLM-L6)": "sentence-transformers/all-MiniLM-L6-v2",
-    "Balanced (MPNet-Base)": "sentence-transformers/all-mpnet-base-v2",
-    "High Accuracy (BGE-Large)": "BAAI/bge-large-en-v1.5"
-}
 # Global state
 vector_store = None
@@ -93,7 +84,7 @@ def load_documents(files):
     return documents
 # Function to process documents and create vector store
-def process_documents(files, chunk_size, chunk_overlap, embedding_model):
     global vector_store
     if not files:
         return "Please upload at least one document.", None
@@ -128,14 +119,13 @@ def process_documents(files, chunk_size, chunk_overlap, embedding_model):
     # Create embeddings
     try:
-        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELS[embedding_model])
     except Exception as e:
-        logger.error(f"Error initializing embeddings for {embedding_model}: {str(e)}")
         return f"Error initializing embeddings: {str(e)}", None
     # Create vector store
     try:
-        # Use in-memory Chroma client to avoid filesystem issues
         collection_name = f"doctalk_collection_{int(time.time())}"
         client = chromadb.Client()
         vector_store = Chroma.from_documents(
@@ -154,21 +144,20 @@ def process_documents(files, chunk_size, chunk_overlap, embedding_model):
     wait=wait_exponential(multiplier=1, min=4, max=10),
     retry=retry_if_exception_type((requests.exceptions.HTTPError, requests.exceptions.ConnectionError))
 )
-def initialize_qa_chain(llm_model, temperature):
     global qa_chain
     if not vector_store:
         return "Please process documents first.", None
     try:
         llm = HuggingFaceEndpoint(
-            repo_id=LLM_MODELS[llm_model],
             task="text-generation",
             temperature=float(temperature),
             max_new_tokens=512,
             huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
             timeout=30
         )
-        # Dynamically set k based on vector store size
         collection = vector_store._collection
         doc_count = collection.count()
         k = min(3, doc_count) if doc_count > 0 else 1
@@ -177,17 +166,17 @@ def initialize_qa_chain(llm_model, temperature):
             retriever=vector_store.as_retriever(search_kwargs={"k": k}),
             memory=memory
         )
-        logger.info(f"Initialized QA chain with {llm_model} and k={k}.")
         return "QA Doctor: QA chain initialized successfully.", None
     except requests.exceptions.HTTPError as e:
-        logger.error(f"HTTP error initializing QA chain for {llm_model}: {str(e)}")
         if "503" in str(e):
-            return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try 'High Accuracy (Mixtral-8x7B)' or wait and retry.", None
         elif "403" in str(e):
-            return f"Error: Access denied for {llm_model}. Free-tier API limits models >10GB. Try 'High Accuracy (Mixtral-8x7B)' or upgrade to Pro at https://huggingface.co/settings/billing.", None
         return f"Error initializing QA chain: {str(e)}.", None
     except Exception as e:
-        logger.error(f"Error initializing QA chain for {llm_model}: {str(e)}")
         return f"Error initializing QA chain: {str(e)}. Ensure your HF token is valid.", None
 # Function to handle user query with retry logic
@@ -196,7 +185,7 @@ def initialize_qa_chain(llm_model, temperature):
     wait=wait_exponential(multiplier=1, min=4, max=10),
     retry=retry_if_exception_type((requests.exceptions.HTTPError, requests.exceptions.ConnectionError))
 )
-def answer_question(question, llm_model, embedding_model, temperature, chunk_size, chunk_overlap):
     global chat_history
     if not vector_store:
         return "Please process documents first.", chat_history
@@ -214,9 +203,9 @@ def answer_question(question, llm_model, embedding_model, temperature, chunk_siz
     except requests.exceptions.HTTPError as e:
         logger.error(f"HTTP error answering question: {str(e)}")
         if "503" in str(e):
-            return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try 'High Accuracy (Mixtral-8x7B)' or wait and retry.", chat_history
         elif "403" in str(e):
-            return f"Error: Access denied for {llm_model}. Free-tier API limits models >10GB. Try 'High Accuracy (Mixtral-8x7B)' or upgrade to Pro at https://huggingface.co/settings/billing.", chat_history
         return f"Error answering question: {str(e)}", chat_history
     except Exception as e:
         logger.error(f"Error answering question: {str(e)}")
@@ -261,7 +250,7 @@ def reset_app():
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as demo:
     gr.Markdown("# DocTalk: Document Q&A Chatbot")
-    gr.Markdown("Upload documents (PDF, TXT, DOCX, PPTX), select models, tune parameters, and ask questions!")
     with gr.Row():
         with gr.Column(scale=2):
@@ -272,8 +261,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=1):
-            llm_model = gr.Dropdown(choices=list(LLM_MODELS.keys()), label="Select LLM Model", value="High Accuracy (Mixtral-8x7B)")
-            embedding_model = gr.Dropdown(choices=list(EMBEDDING_MODELS.keys()), label="Select Embedding Model", value="Lightweight (MiniLM-L6)")
             temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature")
             chunk_size = gr.Slider(minimum=500, maximum=2000, step=100, value=1000, label="Chunk Size")
             chunk_overlap = gr.Slider(minimum=0, maximum=500, step=50, value=100, label="Chunk Overlap")
@@ -289,17 +276,17 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as
     # Event handlers
     process_button.click(
         fn=process_documents,
-        inputs=[file_upload, chunk_size, chunk_overlap, embedding_model],
         outputs=[status, chat_display]
     )
     init_button.click(
         fn=initialize_qa_chain,
-        inputs=[llm_model, temperature],
         outputs=[status, chat_display]
     )
     question.submit(
         fn=answer_question,
-        inputs=[question, llm_model, embedding_model, temperature, chunk_size, chunk_overlap],
         outputs=[answer, chat_display]
     )
     export_button.click(

 # Environment setup for Hugging Face token
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "default-token")
 if os.environ["HUGGINGFACEHUB_API_TOKEN"] == "default-token":
+    logger.warning("HUGGINGFACEHUB_API_TOKEN not set. Model may not work.")
+# Model and embedding configuration
+LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
 # Global state
 vector_store = None
     return documents
 # Function to process documents and create vector store
+def process_documents(files, chunk_size, chunk_overlap):
     global vector_store
     if not files:
         return "Please upload at least one document.", None
     # Create embeddings
     try:
+        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
     except Exception as e:
+        logger.error(f"Error initializing embeddings: {str(e)}")
         return f"Error initializing embeddings: {str(e)}", None
     # Create vector store
     try:
         collection_name = f"doctalk_collection_{int(time.time())}"
         client = chromadb.Client()
         vector_store = Chroma.from_documents(
     wait=wait_exponential(multiplier=1, min=4, max=10),
     retry=retry_if_exception_type((requests.exceptions.HTTPError, requests.exceptions.ConnectionError))
 )
+def initialize_qa_chain(temperature):
     global qa_chain
     if not vector_store:
         return "Please process documents first.", None
     try:
         llm = HuggingFaceEndpoint(
+            repo_id=LLM_MODEL,
             task="text-generation",
             temperature=float(temperature),
             max_new_tokens=512,
             huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
             timeout=30
         )
         collection = vector_store._collection
         doc_count = collection.count()
         k = min(3, doc_count) if doc_count > 0 else 1
             retriever=vector_store.as_retriever(search_kwargs={"k": k}),
             memory=memory
         )
+        logger.info(f"Initialized QA chain with {LLM_MODEL} and k={k}.")
         return "QA Doctor: QA chain initialized successfully.", None
     except requests.exceptions.HTTPError as e:
+        logger.error(f"HTTP error initializing QA chain: {str(e)}")
         if "503" in str(e):
+            return "Error: Hugging Face API temporarily unavailable. Please wait and retry.", None
         elif "403" in str(e):
+            return "Error: Access denied. Check your HF token or upgrade to Pro at https://huggingface.co/settings/billing.", None
         return f"Error initializing QA chain: {str(e)}.", None
     except Exception as e:
+        logger.error(f"Error initializing QA chain: {str(e)}")
         return f"Error initializing QA chain: {str(e)}. Ensure your HF token is valid.", None
 # Function to handle user query with retry logic
     wait=wait_exponential(multiplier=1, min=4, max=10),
     retry=retry_if_exception_type((requests.exceptions.HTTPError, requests.exceptions.ConnectionError))
 )
+def answer_question(question, temperature, chunk_size, chunk_overlap):
     global chat_history
     if not vector_store:
         return "Please process documents first.", chat_history
     except requests.exceptions.HTTPError as e:
         logger.error(f"HTTP error answering question: {str(e)}")
         if "503" in str(e):
+            return "Error: Hugging Face API temporarily unavailable. Please wait and retry.", chat_history
         elif "403" in str(e):
+            return "Error: Access denied. Check your HF token or upgrade to Pro at https://huggingface.co/settings/billing.", chat_history
         return f"Error answering question: {str(e)}", chat_history
     except Exception as e:
         logger.error(f"Error answering question: {str(e)}")
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as demo:
     gr.Markdown("# DocTalk: Document Q&A Chatbot")
+    gr.Markdown("Upload documents (PDF, TXT, DOCX, PPTX), tune parameters, and ask questions! Uses Mixtral-8x7B and BGE-Large for high accuracy.")
     with gr.Row():
         with gr.Column(scale=2):
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=1):
             temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature")
             chunk_size = gr.Slider(minimum=500, maximum=2000, step=100, value=1000, label="Chunk Size")
             chunk_overlap = gr.Slider(minimum=0, maximum=500, step=50, value=100, label="Chunk Overlap")
     # Event handlers
     process_button.click(
         fn=process_documents,
+        inputs=[file_upload, chunk_size, chunk_overlap],
         outputs=[status, chat_display]
     )
     init_button.click(
         fn=initialize_qa_chain,
+        inputs=[temperature],
         outputs=[status, chat_display]
     )
     question.submit(
         fn=answer_question,
+        inputs=[question, temperature, chunk_size, chunk_overlap],
         outputs=[answer, chat_display]
     )
     export_button.click(