Spaces:

abubasith86
/

Chatbot

Sleeping

App Files Files Community

abubasith86 commited on 10 days ago

Commit

a9c7e25

1 Parent(s): 5426913

Reviked changes

Browse files

Files changed (1) hide show

app.py +132 -89

app.py CHANGED Viewed

@@ -1,123 +1,166 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import fitz  # pymupdf
 from duckduckgo_search import DDGS
 from serpapi import GoogleSearch
-import tempfile
-import os
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 SYSTEM_PROMPT = """
-You are an intelligent and friendly AI assistant.
 Your goals:
-- Use provided documents to answer questions accurately.
-- When the query is recent or about current events, leverage web search results.
-- If nothing is provided, rely on your general knowledge.
-- Always be honest, polite, and helpful.
 """
-RECENT_KEYWORDS = {
-    "latest", "today", "current", "now", "recent", "news", "update", "price",
-    "who won", "what happened", "trending", "breaking", "just in", "live",
-    "score", "results", "weather", "forecast", "report", "market", "stocks",
-    "crypto", "rate", "exchange", "gold price", "happening", "event", "updates",
-    "hot", "viral", "announcement", "today's", "this week", "schedule", "calendar",
-    "launch", "drop", "release date", "opening", "closing", "deadline",
-}
-def extract_text_from_pdf(pdf_file) -> str:
-    try:
-        with fitz.open(pdf_file.name) as doc:
-            return " ".join(page.get_text() for page in doc)
-    except Exception as e:
-        return f"Failed to read PDF: {e}"
-def search_web(query: str) -> str:
-    try:
-        params = {
-            "q": query,
-            "api_key": os.getenv("SERPAPI_KEY", ""),  # Keep it optional and env-based
-            "engine": "google",
-        }
-        if params["api_key"]:
-            results = GoogleSearch(params).get_dict()
-            if "organic_results" in results:
-                return "\n".join(
-                    f"{r.get('title', '')}: {r.get('snippet', '')}"
-                    for r in results["organic_results"][:3]
-                )
-    except Exception:
-        pass
-    try:
-        with DDGS() as ddgs:
-            results = ddgs.text(query)
-            if results:
-                return results[0]["body"]
-    except Exception:
-        pass
-    return "No relevant web results found."
 def respond(
-    message: str,
     history: list[tuple[str, str]],
-    pdf: object = None,
-    temperature: float = 0.4,
-    top_p: float = 0.1,
-    max_tokens: int = 2048,
 ):
-    context = ""
     message_lower = message.lower()
-    # 1. Use PDF content if available
-    if pdf is not None:
-        context = extract_text_from_pdf(pdf)
-        message += f"\n\n[Document context provided below for reference:]\n{context}"
-    # 2. Use web search if query looks recent
-    if any(keyword in message_lower for keyword in RECENT_KEYWORDS):
-        web_context = search_web(message)
         if web_context:
-            message += f"\n\n[Relevant web search results to help you answer]:\n{web_context}"
-    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    for user, assistant in history:
-        messages.append({"role": "user", "content": user})
-        messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
-    # Stream LLM response
-    full_response = ""
-    for chunk in client.chat_completion(
         messages,
         stream=True,
         temperature=temperature,
         top_p=top_p,
-        max_tokens=max_tokens,
     ):
-        token = chunk.choices[0].delta.content or ""
-        full_response += token
-        yield full_response
-with gr.Blocks() as demo:
-    gr.Markdown("## 💬 Smart Assistant with Web & Document Context")
-    with gr.Row():
-        pdf_input = gr.File(label="📄 Upload PDF (optional)", file_types=[".pdf"])
-        temperature = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
-        top_p = gr.Slider(0.1, 1.0, value=0.1, step=0.05, label="Top-p")
-    chat = gr.ChatInterface(
-        fn=respond,
-        additional_inputs=[pdf_input, temperature, top_p],
-    )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+import pymupdf
 from duckduckgo_search import DDGS
 from serpapi import GoogleSearch
+"""
+For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# PDF Parsing
+def extract_text_from_pdf(pdf_file):
+    doc = pymupdf.open(pdf_file)
+    text = " ".join([page.get_textpage().extractTEXT() for page in doc])
+    return text
+# Web search fallback
+def search_web(query):
+    with DDGS() as ddgs:
+        results = ddgs.text(query)
+        if results:
+            return results[0]["body"]
+    return "No relevant results found on the web."
+def google_search(query):
+    params = {
+        "q": query,
+        "api_key": "b11d4a3660600e7e7f481b3288f107fbf993389a20125b0a97ebe7ab207854a5",  # Replace this with your real key
+        "engine": "google",
+    }
+    search = GoogleSearch(params)
+    results = search.get_dict()
+    if "organic_results" in results:
+        # Combine top 3 results
+        summaries = []
+        for res in results["organic_results"][:3]:
+            title = res.get("title", "")
+            snippet = res.get("snippet", "")
+            summaries.append(f"{title}: {snippet}")
+        return "\n".join(summaries)
+    return None
 SYSTEM_PROMPT = """
+You are an intelligent and friendly AI assistant.
 Your goals:
+- Answer user questions clearly and concisely.
+- If a PDF document is provided, use its content to give informed answers.
+- For questions about recent or live topics (e.g., news, prices, events), you may perform a web search and summarize the result.
+- If no document or web context is available, still try to help using general knowledge.
+- Be honest if you don’t know something.
+- Always be polite, helpful, and respectful.
 """
 def respond(
+    message,
     history: list[tuple[str, str]],
+    max_tokens=2048,
+    temperature=0.4,
+    top_p=0.1,
 ):
+    recent_keywords = [
+        "latest",
+        "today",
+        "current",
+        "now",
+        "recent",
+        "news",
+        "update",
+        "price",
+        "who won",
+        "what happened",
+        "trending",
+        "breaking",
+        "just in",
+        "new release",
+        "live",
+        "score",
+        "results",
+        "weather",
+        "forecast",
+        "report",
+        "market",
+        "stocks",
+        "crypto",
+        "rate",
+        "exchange",
+        "gold price",
+        "happening",
+        "event",
+        "updates",
+        "hot",
+        "viral",
+        "announcement",
+        "today's",
+        "this week",
+        "schedule",
+        "calendar",
+        "launch",
+        "drop",
+        "release date",
+        "opening",
+        "closing",
+        "deadline",
+    ]
     message_lower = message.lower()
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    if any(kw in message_lower for kw in recent_keywords):
+        web_context = google_search(message)
         if web_context:
+            # Inject web context as part of the user's query
+            message = f"{message}\n\n[Relevant web search results to help you answer]:\n{web_context}"
     messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
         messages,
+        max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        token = message.choices[0].delta.content
+        response += token
+        yield response
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+)
 if __name__ == "__main__":
     demo.launch()