Spaces:

Mr-Geo
/

BAS_Website_AI

Running on Zero

App Files Files Community

Mr-Geo commited on Feb 2

Commit

b9db1f0

verified ·

1 Parent(s): 41e54f7

Update app.py

Browse files

Files changed (1) hide show

app.py +207 -50

app.py CHANGED Viewed

@@ -1,64 +1,221 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

+import os
+from dotenv import load_dotenv
+from groq import Groq
+import chromadb
+from chromadb.config import Settings
+import torch
+from sentence_transformers import CrossEncoder
 import gradio as gr
+from datetime import datetime  # Import datetime to get current time
+# Load environment variables and initialize clients
+load_dotenv()
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+client = Groq(api_key=GROQ_API_KEY)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+def initialize_system():
+    chroma_client = chromadb.PersistentClient(
+        path="./chroma_db",
+        settings=Settings(anonymized_telemetry=False, allow_reset=True, is_persistent=True)
+    )
+    embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
+        model_name="sentence-transformers/all-mpnet-base-v2",
+        device=DEVICE
+    )
+    reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', device=DEVICE)
+    collection = chroma_client.get_collection(name="website_content", embedding_function=embedding_function)
+    return chroma_client, collection, reranker
+def get_context(message):
+    results = collection.query(
+        query_texts=[message],
+        n_results=500,
+        include=["metadatas", "documents", "distances"]
+    )
+    print(f"\n=== Search Results ===")
+    print(f"Initial ChromaDB results found: {len(results['documents'][0])}")
+    # Rerank all results
+    rerank_pairs = [(message, doc) for doc in results['documents'][0]]
+    rerank_scores = reranker.predict(rerank_pairs)
+    # Create list of results with scores
+    all_results = []
+    url_chunks = {}  # Group chunks by URL
+    # Group chunks by URL and store their scores
+    for score, doc, metadata in zip(rerank_scores, results['documents'][0], results['metadatas'][0]):
+        url = metadata['url']
+        if url not in url_chunks:
+            url_chunks[url] = []
+        url_chunks[url].append({'text': doc, 'metadata': metadata, 'score': score})
+    # For each URL, select the best chunks while maintaining diversity
+    for url, chunks in url_chunks.items():
+        # Sort chunks for this URL by score
+        chunks.sort(key=lambda x: x['score'], reverse=True)
+        # Take up to 5 chunks per URL, but only if their scores are good
+        selected_chunks = []
+        for chunk in chunks[:5]:  # 5 chunks per URL
+            # Only include if score is decent
+            if chunk['score'] > -10:  # Increased threshold to ensure higher relevance
+                selected_chunks.append(chunk)
+        # Add selected chunks to final results
+        all_results.extend(selected_chunks)
+    # Sort all results by score for final ranking
+    all_results.sort(key=lambda x: x['score'], reverse=True)
+    # Take only top 20 results maximum
+    all_results = all_results[:20]
+    print(f"\nFinal results after reranking and filtering: {len(all_results)}")
+    if all_results:
+        print("\nTop Similarity Scores and URLs:")
+        for i, result in enumerate(all_results[:20], 1):  # Show only top 20 in logs
+            print(f"{i}. Score: {result['score']:.4f} - URL: {result['metadata']['url']}")
+    print("=" * 50)
+    # Build context from filtered results
+    context = "\nRelevant Information:\n"
+    total_chars = 0
+    max_chars = 30000  # To ensure we don't exceed token limits
+    for result in all_results:
+        chunk_text = f"\nSource: {result['metadata']['url']}\n{result['text']}\n"
+        if total_chars + len(chunk_text) > max_chars:
+            break
+        context += chunk_text
+        total_chars += len(chunk_text)
+    print(f"\nFinal context length: {total_chars} characters")
+    return context
+def chat_response(message, history):
+    """Chat response function for Gradio interface"""
+    try:
+        # Get context
+        context = get_context(message)
+        # Get current time and date
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Build messages list
+        messages = [{
+            "role": "system",
+            "content": f"""You are an AI assistant for the British Antarctic Survey (BAS). Your responses should be based ONLY on the context provided below.
+IMPORTANT INSTRUCTIONS:
+1. ALWAYS thoroughly check the provided context before saying you don't have information
+2. If you find ANY relevant information in the context, use it - even if it's not complete
+3. If you find time-sensitive information in the context, share it - it's current as of when the context was retrieved
+4. When citing sources, put them on a new line after the relevant information like this:
+   Here is some information about BAS.
+   Source: https://www.bas.ac.uk/example
+5. Do not say things like:
+   - "I don't have access to real-time information"
+   - "I cannot browse the internet"
+   Instead, share what IS in the context, and only say "I don't have enough information" if you truly find nothing relevant to the users question.
+6. Keep responses:
+   - With emojis where appropriate
+   - Without duplicate source citations
+   - Based strictly on the context below
+Current Time: {current_time}
+Context: {context}"""
+        }]
+        print("\n\n==========START Contents of the message being sent to the LLM==========\n")
+        print(messages)
+        print("\n\n==========END Contents of the message being sent to the LLM==========\n")
+        # Add history and current message
+        if history:
+            for h in history:
+                messages.append({"role": "user", "content": str(h[0])})
+                if h[1]:  # If there's a response
+                    messages.append({"role": "assistant", "content": str(h[1])})
+        messages.append({"role": "user", "content": str(message)})
+        # Get response
+        response = ""
+        completion = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=messages,
+            temperature=0.7,
+            max_tokens=2000,
+            top_p=0.95,
+            stream=True
+        )
+        print("\n=== LLM Response Start ===")
+        for chunk in completion:
+            if chunk.choices[0].delta.content:
+                response += chunk.choices[0].delta.content
+                print(chunk.choices[0].delta.content, end='', flush=True)
+                yield response
+        print("\n=== LLM Response End ===\n")
+    except Exception as e:
+        error_msg = f"An error occurred: {str(e)}"
+        print(f"\nERROR: {error_msg}")
+        yield error_msg
 if __name__ == "__main__":
+    try:
+        print("\n=== Starting Application ===")
+        # Initialise system
+        print("Initialising ChromaDB...")
+        chroma_client, collection, reranker = initialize_system()
+        print(f"Found {collection.count()} documents in collection")
+        print("\nCreating Gradio interface...")
+        # Create a simple Gradio interface
+        demo = gr.Blocks()
+        with demo:
+            gr.Markdown("# Website Chat Assistant")
+            gr.Markdown("Ask questions about the website.")
+            chatbot = gr.Chatbot(height=600)
+            msg = gr.Textbox(placeholder="Ask a question...", label="Your question")
+            clear = gr.Button("Clear")
+            def user(user_message, history):
+                return "", history + [[user_message, None]]
+            def bot(history):
+                if history and history[-1][1] is None:
+                    for response in chat_response(history[-1][0], history[:-1]):
+                        history[-1][1] = response
+                        yield history
+            msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot, chatbot, chatbot
+            )
+            clear.click(lambda: None, None, chatbot, queue=False)
+        # Launch with minimal configuration
+        demo.queue()
+        demo.launch(
+            server_name="127.0.0.1",
+            server_port=7860,
+            share=False
+        )
+    except Exception as e:
+        print(f"\nERROR: {str(e)}")
+        raise