Spaces:

Looker01202
/

hotel-chat

Runtime error

App Files Files Community

looker01202 commited on 13 days ago

Commit

5573ab1

1 Parent(s): 0c79881

Gemini changes added 1

Browse files

Files changed (1) hide show

app.py +101 -48

app.py CHANGED Viewed

@@ -23,14 +23,20 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 def load_model():
     print(f"🔍 Loading model: {primary_checkpoint}")
     try:
         tokenizer = AutoTokenizer.from_pretrained(
             primary_checkpoint,
-            use_fast=True
         )
         model = AutoModelForCausalLM.from_pretrained(
             primary_checkpoint,
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True
         ).to(device)
         print(f"✅ Loaded primary {primary_checkpoint}")
         return tokenizer, model, primary_checkpoint
@@ -53,9 +59,15 @@ print(tokenizer.chat_template)
 def load_hotel_docs(hotel_id):
     path = os.path.join("knowledge", f"{hotel_id}.txt")
     if not os.path.exists(path):
         return []
-    content = open(path, encoding="utf-8").read().strip()
-    return [(hotel_id, content)]
 # Chat function
 def chat(message, history, hotel_id):
@@ -69,7 +81,7 @@ def chat(message, history, hotel_id):
     # Yield user message immediately
     ui_history = [{"role": r, "content": c} for r, c in history_tuples]
-    yield ui_history, ""
     # Local Qwen flow
     if not is_space:
@@ -81,105 +93,146 @@ def chat(message, history, hotel_id):
             add_generation_prompt=True
         )
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
         decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
         print(decoded)
-        # Extract assistant response
-        response = decoded.split("<|im_start|>assistant")[-1]
-        response = response.split("<|im_end|>")[0].strip()
     else:
-        # IBM Granite RAG flow
-        system_prompt = (
-            "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
-            "You are Alexander, the front desk assistant at Family Village Inn in Cyprus. "
-            "You only know what's in the provided documents. "
-            "Greet guests politely, but only chit-chat when it helps answer hotel questions. "
-            "Answer using only facts from the documents; if unavailable, say you cannot answer."
         )
-        messages = [{"role": "system", "content": system_prompt}]
-        for doc_id, doc_content in load_hotel_docs(hotel_id):
             messages.append({"role": "document", "content": doc_content, "document_id": doc_id})
         # Include full history including the new user message
         for role, content in history_tuples:
             messages.append({"role": role, "content": content})
-        # Apply the template to the chat dictionary to create a templated string which can be tokenized
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
         )
-        # Print the templated string
-        print("printing templated chat\n")
         print(input_text)
-        # Turn into tensors
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         with torch.no_grad():
-            outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
         decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
-        # Print the templated string
-        print("printing reply from model\n")
         print(decoded)
-        response = decoded.split("<|start_of_role|>assistant<|end_of_role|>")[-1]
-        response = response.split("<|end_of_text|>")[0].strip()
-    # add the assistant reply to the running transcript
-    ui_history.append({"role": "assistant", "content": response})
     # Final yield with assistant reply
-    yield ui_history, ""
 # Available hotels
 hotel_ids = ["cyprus-guesthouse-family", "coastal-villa-family", "village-inn-family"]
-# Gradio UI
 # Gradio UI
 with gr.Blocks() as demo:
-    # ⬇️  NEW panel wrapper
     with gr.Column(variant="panel"):
         gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
         gr.Markdown(f"**Running:** {model_name}")
         hotel_selector = gr.Dropdown(
             hotel_ids,
             label="Hotel",
-            value=hotel_ids[0]
         )
-        # Chat window in its own row so it stretches
         with gr.Row():
-            chatbot = gr.Chatbot(type="messages")
         msg = gr.Textbox(
             show_label=False,
             placeholder="Ask about the hotel..."
         )
-        # Clear‑history button
-        gr.Button("Clear").click(lambda: ([], ""), None, [chatbot, msg])
-        # Wire the textbox to the chat function
         msg.submit(
             fn=chat,
             inputs=[msg, chatbot, hotel_selector],
-            outputs=[chatbot, msg]
         )
-    # Anything outside the column shows below the panel
     gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
-# Enable streaming queue for generator-based chat
 demo.queue(default_concurrency_limit=2, max_size=32)
 if __name__ == "__main__":

 def load_model():
     print(f"🔍 Loading model: {primary_checkpoint}")
     try:
+        # Use optimized loading settings suitable for Granite
+        load_kwargs = {
+            "use_fast": True,
+            "torch_dtype": torch.float16,
+            "low_cpu_mem_usage": True
+        } if primary_checkpoint.startswith("ibm-granite") else {}
         tokenizer = AutoTokenizer.from_pretrained(
             primary_checkpoint,
+            **{k: v for k, v in load_kwargs.items() if k == 'use_fast'} # Only pass use_fast to tokenizer
         )
         model = AutoModelForCausalLM.from_pretrained(
             primary_checkpoint,
+            **{k: v for k, v in load_kwargs.items() if k != 'use_fast'} # Pass other kwargs to model
         ).to(device)
         print(f"✅ Loaded primary {primary_checkpoint}")
         return tokenizer, model, primary_checkpoint
 def load_hotel_docs(hotel_id):
     path = os.path.join("knowledge", f"{hotel_id}.txt")
     if not os.path.exists(path):
+        print(f"⚠️ Knowledge file not found: {path}")
+        return []
+    try:
+        with open(path, encoding="utf-8") as f:
+            content = f.read().strip()
+        return [(hotel_id, content)]
+    except Exception as e:
+        print(f"❌ Error reading knowledge file {path}: {e}")
         return []
 # Chat function
 def chat(message, history, hotel_id):
     # Yield user message immediately
     ui_history = [{"role": r, "content": c} for r, c in history_tuples]
+    yield ui_history, "" # Update chat, clear textbox
     # Local Qwen flow
     if not is_space:
             add_generation_prompt=True
         )
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
         decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        print("--- Qwen Raw Output ---")
         print(decoded)
+        print("-----------------------")
+        # Extract assistant response for Qwen
+        try:
+            response = decoded.split("<|im_start|>assistant")[-1]
+            response = response.split("<|im_end|>")[0].strip()
+            if not response: # Handle potential empty split
+                 response = "Sorry, I encountered an issue generating a response."
+        except IndexError:
+            print("❌ Error splitting Qwen response.")
+            response = "Sorry, I couldn't parse the model's response."
+    # IBM Granite RAG flow (Space environment)
     else:
+        # --- Start: Dynamic System Prompt Loading ---
+        default_system_prompt = (
+            "You are a helpful hotel assistant. Use only the provided documents to answer questions about the hotel. "
+            "Greet guests politely. If the information needed to answer the question is not available in the documents, "
+            "inform the user that the question cannot be answered based on the available data."
         )
+        system_prompt_filename = f"{hotel_id}-system.txt"
+        system_prompt_path = os.path.join("knowledge", system_prompt_filename)
+        system_prompt_content = default_system_prompt # Start with default
+        if os.path.exists(system_prompt_path):
+            try:
+                with open(system_prompt_path, "r", encoding="utf-8") as f:
+                    loaded_prompt = f.read().strip()
+                if loaded_prompt: # Use file content only if it's not empty
+                    system_prompt_content = loaded_prompt
+                    print(f"✅ Loaded system prompt from: {system_prompt_path}")
+                else:
+                    print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
+            except Exception as e:
+                print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
+        else:
+            print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
+        # --- End: Dynamic System Prompt Loading ---
+        messages = [{"role": "system", "content": system_prompt_content}]
+        # Load and add hotel document(s)
+        hotel_docs = load_hotel_docs(hotel_id)
+        if not hotel_docs:
+             # If no knowledge doc found, inform user and stop
+             ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
+             yield ui_history, "" # Update chat, keep textbox cleared
+             return # Exit the function early
+        for doc_id, doc_content in hotel_docs:
             messages.append({"role": "document", "content": doc_content, "document_id": doc_id})
         # Include full history including the new user message
         for role, content in history_tuples:
             messages.append({"role": role, "content": content})
+        # Apply the template
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
         )
+        print("--- Granite Templated Input ---")
         print(input_text)
+        print("-----------------------------")
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         with torch.no_grad():
+            # Using do_sample=False for more deterministic RAG based on context
+            outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False)
         decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        print("--- Granite Raw Output ---")
         print(decoded)
+        print("--------------------------")
+        # Extract assistant response for Granite
+        try:
+            response = decoded.split("<|start_of_role|>assistant<|end_of_role|>")[-1]
+            response = response.split("<|end_of_text|>")[0].strip()
+            if not response: # Handle potential empty split
+                 response = "Sorry, I encountered an issue generating a response."
+        except IndexError:
+            print("❌ Error splitting Granite response.")
+            response = "Sorry, I couldn't parse the model's response."
+    # Add the final assistant reply to the UI history
+    ui_history.append({"role": "assistant", "content": response})
     # Final yield with assistant reply
+    yield ui_history, "" # Update chat, keep textbox cleared
 # Available hotels
 hotel_ids = ["cyprus-guesthouse-family", "coastal-villa-family", "village-inn-family"]
 # Gradio UI
 with gr.Blocks() as demo:
     with gr.Column(variant="panel"):
         gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
         gr.Markdown(f"**Running:** {model_name}")
         hotel_selector = gr.Dropdown(
             hotel_ids,
             label="Hotel",
+            value=hotel_ids[0] # Default selection
         )
         with gr.Row():
+            # Use type="messages" for the dictionary format expected by the chat function
+            chatbot = gr.Chatbot(type="messages", label="Chat History")
         msg = gr.Textbox(
             show_label=False,
             placeholder="Ask about the hotel..."
         )
+        # Clear button needs to reset chatbot to None or empty list, and clear textbox
+        clear_btn = gr.Button("Clear")
+        clear_btn.click(lambda: (None, ""), None, [chatbot, msg]) # Reset chatbot history to None
+        # Wire the textbox submission
         msg.submit(
             fn=chat,
             inputs=[msg, chatbot, hotel_selector],
+            outputs=[chatbot, msg] # chatbot updates, msg clears
         )
     gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
+# Enable streaming queue
 demo.queue(default_concurrency_limit=2, max_size=32)
 if __name__ == "__main__":