Spaces:

Looker01202
/

hotel-chat

Paused

App Files Files Community

looker01202 commited on 11 days ago

Commit

7226a27

1 Parent(s): 45c767b

setup local venv to use gguf

Browse files

Files changed (3) hide show

.gitignore +33 -2
app.py +245 -287
requirements.txt +2 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,34 @@
-myvenv/
-.cache/
 __pycache__/

+# .gitignore
+# Python Virtual Environment
+venv/
+.venv/
+# Python cache files
 __pycache__/
+*.pyc
+*.pyo
+*.pyd
+# Local Models (assuming you download GGUF here)
+models/
+*.gguf
+# IDE / Editor specific files
+.vscode/
+.idea/
+*.sublime-project
+*.sublime-workspace
+# OS generated files
+.DS_Store
+Thumbs.db
+# Secrets / Environment variables (if you use a .env file later)
+.env
+# Gradio cache/temp files (optional, but can be useful)
+gradio_cached_examples/
+gradio_cached_examples_log.csv
+.cache

app.py CHANGED Viewed

@@ -1,214 +1,241 @@
 import os
 import re
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Detect Space environment by SPACE_ID env var
 env = os.environ
 is_space = env.get("SPACE_ID") is not None
-print("RUNNING IN SPACE?", is_space)
-# Model selection
-if is_space:
-    primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
-    fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
-else:
-    primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
-    fallback_checkpoint = None
-# Device setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load model with fallback
 def load_model():
-    print(f"🔍 Loading model: {primary_checkpoint}")
-    try:
-        # Use optimized loading settings suitable for Granite
-        load_kwargs = {
-            "use_fast": True,
-            "torch_dtype": torch.float16,
-            "low_cpu_mem_usage": True
-        } if primary_checkpoint.startswith("ibm-granite") else {}
-        tokenizer = AutoTokenizer.from_pretrained(
-            primary_checkpoint,
-            **{k: v for k, v in load_kwargs.items() if k == 'use_fast'} # Only pass use_fast to tokenizer
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            primary_checkpoint,
-            **{k: v for k, v in load_kwargs.items() if k != 'use_fast'} # Pass other kwargs to model
-        ).to(device)
-        print(f"✅ Loaded primary {primary_checkpoint}")
-        return tokenizer, model, primary_checkpoint
-    except Exception as e:
-        print(f"❌ Primary load failed: {e}")
-        if fallback_checkpoint:
-            print(f"🔁 Falling back to {fallback_checkpoint}")
-            tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
-            model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
-            print(f"✅ Loaded fallback {fallback_checkpoint}")
-            return tokenizer, model, fallback_checkpoint
-        raise
-tokenizer, model, model_name = load_model()
-# --- Start: Apply CORRECTED Chat Template from File (if Granite) ---
-if "granite" in model_name.lower():
-    template_filename = "granite3.3_2b_chat_template.jinja" # Use the new filename
-    applied_template = False
-    try:
-        # Assuming the template file is in the same directory as app.py (project root)
-        print(f"Attempting to load corrected chat template from: {template_filename}")
-        with open(template_filename, "r", encoding="utf-8") as f:
-            custom_chat_template_content = f.read()
-        # Assign the loaded template content
-        tokenizer.chat_template = custom_chat_template_content
-        applied_template = True
-        print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
-    except FileNotFoundError:
-        print(f"⚠️ WARNING: Corrected template file '{template_filename}' not found.")
-    except Exception as e:
-        print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
-    # Fallback / Verification print
-    if not applied_template:
-         print("Falling back to tokenizer's default built-in template (which might be incorrect).")
-    print("--- Final Chat Template Being Used ---")
-    if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
-        print(tokenizer.chat_template) # Print the template actually being used
-    else:
-        print("Tokenizer does not have a chat_template attribute or it is empty.")
-    print("------------------------------------")
-else:
-    print("Model is not Granite, using default chat template.")
-# --- End: Apply CORRECTED Chat Template from File ---
-# --- Start: Print Loaded Chat Template ---
-print("--- Tokenizer's Loaded Chat Template ---")
-# Check if the attribute exists before printing
-if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
-    print(tokenizer.chat_template)
-else:
-    print("Tokenizer does not have a chat_template attribute or it is empty.")
-print("------------------------------------")
-# --- End: Print Loaded Chat Template ---
-# Load hotel docs
 def load_hotel_docs(hotel_id):
-    path = os.path.join("knowledge", f"{hotel_id}.txt")
     if not os.path.exists(path):
         print(f"⚠️ Knowledge file not found: {path}")
         return []
     try:
         with open(path, encoding="utf-8") as f:
             content = f.read().strip()
         return [(hotel_id, content)]
     except Exception as e:
         print(f"❌ Error reading knowledge file {path}: {e}")
         return []
-# Chat function
 def chat(message, history, hotel_id):
     # Convert incoming UI history (list of dicts) to tuple list
-    if history is None:
-        history_tuples = []
-    else:
-        history_tuples = [(m['role'], m['content']) for m in history]
-    # Append the new user turn
     history_tuples.append(("user", message))
     # Yield user message immediately
     ui_history = [{"role": r, "content": c} for r, c in history_tuples]
     yield ui_history, "" # Update chat, clear textbox
-    # Local Qwen flow
-    if not is_space:
-        # Build messages including the new user turn
-        msgs = [{"role": role, "content": content} for role, content in history_tuples]
-        input_text = tokenizer.apply_chat_template(
-            msgs,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
-        decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
-        print("--- Qwen Raw Output ---")
-        print(decoded)
-        print("-----------------------")
-        # Extract assistant response for Qwen
-        try:
-            response = decoded.split("<|im_start|>assistant")[-1]
-            response = response.split("<|im_end|>")[0].strip()
-            if not response: # Handle potential empty split
-                 response = "Sorry, I encountered an issue generating a response."
-        except IndexError:
-            print("❌ Error splitting Qwen response.")
-            response = "Sorry, I couldn't parse the model's response."
-    # IBM Granite RAG flow (Space environment)
-    else:
-        # --- Start: Dynamic System Prompt Loading ---
-        default_system_prompt = (
-            "You are a helpful hotel assistant. Use only the provided documents to answer questions about the hotel. "
-            "Greet guests politely. If the information needed to answer the question is not available in the documents, "
-            "inform the user that the question cannot be answered based on the available data."
-        )
         system_prompt_filename = f"{hotel_id}-system.txt"
         system_prompt_path = os.path.join("knowledge", system_prompt_filename)
-        system_prompt_content = default_system_prompt # Start with default
         if os.path.exists(system_prompt_path):
             try:
-                with open(system_prompt_path, "r", encoding="utf-8") as f:
-                    loaded_prompt = f.read().strip()
-                if loaded_prompt: # Use file content only if it's not empty
-                    system_prompt_content = loaded_prompt
-                    print(f"✅ Loaded system prompt from: {system_prompt_path}")
-                else:
-                    print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
-            except Exception as e:
-                print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
-        else:
-            print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
-        # --- End: Dynamic System Prompt Loading ---
         messages = [{"role": "system", "content": system_prompt_content}]
-        # Load and add hotel document(s)
         hotel_docs = load_hotel_docs(hotel_id)
         if not hotel_docs:
              # If no knowledge doc found, inform user and stop
              ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
-             yield ui_history, "" # Update chat, keep textbox cleared
-             return # Exit the function early
-        for hotel_doc_id, doc_content in hotel_docs: # Assuming hotel_docs might contain multiple docs later
             messages.append({
                 "role": "document",
-                "text": doc_content,
-                "doc_id": hotel_doc_id # <<< CHANGED KEY HERE
             })
-        # Include full history including the new user message
         for role, content in history_tuples:
             messages.append({"role": role, "content": content})
-        # Set meta data (annotations)which influences the bot behaviour in the controls json
         controls = {"length":"short","originality": "abstractive"}
-        # Apply the template
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
@@ -216,178 +243,109 @@ def chat(message, history, hotel_id):
             controls=controls
         )
-        print("--- Granite Templated Input ---")
         print(input_text)
-        print("-----------------------------")
-        # --- Tokenize AND get input length/attention mask ---
-        inputs = tokenizer(input_text, return_tensors="pt").to(device) # Use tokenizer()
-        input_length = inputs.input_ids.shape[1] # Define input_length using input_ids
-        print(f"DEBUG: Input token length = {input_length}") # Keep this debug print
-        # --- Generate using input_ids and attention_mask ---
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs.input_ids, # Pass input_ids explicitly
-                attention_mask=inputs.attention_mask, # Pass attention_mask
-                max_new_tokens=1024,
-                do_sample=False
-            )
-        # --- Raw output shape printing (keep) ---
-        print("--- Granite Raw Output Tokens (Shape) ---")
-        print(outputs.shape)
-        print("-----------------------------------------")
-        # --- Start: NEW Decoding Strategy (like IBM example) ---
-        try:
-            # Get only the newly generated token IDs
             new_token_ids = outputs[0][input_length:]
-            print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}") # Debug print
-            # Decode only the new tokens, skipping special tokens like <|end_of_text|>
             response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
-            print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}") # Debug print
-            if not response:
-                 response = "Sorry, I encountered an issue generating a response (empty)."
-        except Exception as e:
-            print(f"❌ Unexpected Error during NEW decoding: {e}")
-            response = "Sorry, an unexpected error occurred during decoding."
-        # --- End: NEW Decoding Strategy ---
-        # --- ADD THIS DEBUG LINE (if not already present) ---
-        print(f"DEBUG: Final response variable before UI append = {repr(response)}")
-        # --- END ADD THIS DEBUG LINE ---
-        # Add the final assistant reply to the UI history
-        ui_history.append({"role": "assistant", "content": response})
-        # Final yield with assistant reply
-        yield ui_history, "" # Update chat, keep textbox cleared
-# --- Start: Dynamic Hotel ID Detection ---
-knowledge_dir = "knowledge"
-available_hotels = []
-# --- Add Debugging ---
-print(f"DEBUG: Current Working Directory: {os.getcwd()}")
-print(f"DEBUG: Checking for knowledge directory at relative path: '{knowledge_dir}'")
-knowledge_dir_abs = os.path.abspath(knowledge_dir)
-print(f"DEBUG: Absolute path for knowledge directory: '{knowledge_dir_abs}'")
-# --- End Debugging ---
-# Check if the knowledge directory exists and is a directory
-if os.path.isdir(knowledge_dir):
-    # --- Add Debugging ---
-    try:
-        print(f"DEBUG: Listing contents of '{knowledge_dir_abs}':")
-        dir_contents = os.listdir(knowledge_dir)
-        print(f"DEBUG: Found files/dirs: {dir_contents}")
-    except Exception as e:
-        print(f"DEBUG: Error listing directory '{knowledge_dir_abs}': {e}")
-    # --- End Debugging ---
-    potential_ids = set()
-    # First pass: collect all potential base names from .txt files
-    for filename in os.listdir(knowledge_dir): # Assuming listdir succeeded if we got here
-        if filename.endswith(".txt") and not filename.startswith('.'): # Ignore hidden files
-            if filename.endswith("-system.txt"):
-                # Extract base name from system prompt file
-                base_name = filename[:-len("-system.txt")]
-            else:
-                # Extract base name from main knowledge file
-                base_name = filename[:-len(".txt")]
-            if base_name: # Ensure we got a non-empty base name
-                potential_ids.add(base_name)
-    # Second pass: check if both files exist for each potential ID
-    # Sort the potential IDs for consistent dropdown order
-    print(f"DEBUG: Potential hotel IDs found: {sorted(list(potential_ids))}") # Debug potential IDs
-    for hotel_id in sorted(list(potential_ids)):
-        main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
-        system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
-        # --- Add Debugging ---
-        main_file_abs = os.path.abspath(main_file)
-        system_file_abs = os.path.abspath(system_file)
-        print(f"DEBUG: Checking pair for ID '{hotel_id}':")
-        print(f"DEBUG:   Main file: '{main_file_abs}' -> Exists? {os.path.exists(main_file)}")
-        print(f"DEBUG:   System file: '{system_file_abs}' -> Exists? {os.path.exists(system_file)}")
-        # --- End Debugging ---
-        # Check if BOTH the main knowledge file AND the system prompt file exist
-        if os.path.exists(main_file) and os.path.exists(system_file):
-            available_hotels.append(hotel_id)
-            print(f"✅ Found valid hotel pair: {hotel_id}")
         else:
-            # Optional: Print a warning if one file exists but not the other
-            if os.path.exists(main_file) or os.path.exists(system_file):
-                 print(f"⚠️ Skipping '{hotel_id}': Missing either '{hotel_id}.txt' or '{hotel_id}-system.txt'")
-            # --- Add Debugging ---
-            # Add an else here to catch cases where NEITHER file exists for a potential ID
-            elif not os.path.exists(main_file) and not os.path.exists(system_file):
-                 print(f"DEBUG: Neither file found for potential ID '{hotel_id}' at checked paths.")
-            # --- End Debugging ---
-else:
-    print(f"❌ Error: Knowledge directory '{knowledge_dir}' (abs path: '{knowledge_dir_abs}') not found or is not a directory.")
-# Handle case where no valid hotels were found
-if not available_hotels:
-    print("🚨 CRITICAL: No valid hotels found in the knowledge directory. The dropdown will be empty.")
-    # You might want to add a placeholder or handle this error more gracefully
-    # For now, the dropdown will just be empty or disabled.
-# --- End: Dynamic Hotel ID Detection ---
-# --- The above dynamic detection replaces the old hardcoded list ---
-# hotel_ids = ["cyprus-guesthouse-family", "coastal-villa-family", "village-inn-family"] # Remove or comment out this line
-# Gradio UI
-# Gradio UI
 with gr.Blocks() as demo:
     with gr.Column(variant="panel"):
         gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
-        gr.Markdown(f"**Running:** {model_name}")
         hotel_selector = gr.Dropdown(
-            choices=available_hotels, # Use the dynamically generated list
             label="Hotel",
-            value=available_hotels[0] if available_hotels else None, # Set default to first found hotel, or None if empty
-            interactive=bool(available_hotels) # Disable dropdown if no hotels are found
         )
         with gr.Row():
-            # Use type="messages" for the dictionary format expected by the chat function
-            chatbot = gr.Chatbot(type="messages", label="Chat History")
         msg = gr.Textbox(
             show_label=False,
             placeholder="Ask about the hotel..."
         )
-        # Clear button needs to reset chatbot to None or empty list, and clear textbox
         clear_btn = gr.Button("Clear")
-        clear_btn.click(lambda: (None, ""), None, [chatbot, msg]) # Reset chatbot history to None
-        # Wire the textbox submission
         msg.submit(
             fn=chat,
             inputs=[msg, chatbot, hotel_selector],
-            outputs=[chatbot, msg] # chatbot updates, msg clears
         )
-    gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
 # Enable streaming queue
 demo.queue(default_concurrency_limit=2, max_size=32)
 if __name__ == "__main__":
     demo.launch()

 import os
 import re
+import json # For debug printing
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+try:
+    # Try importing ctransformers for GGUF support
+    from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
+    CTRANSFORMERS_AVAILABLE = True
+except ImportError:
+    print("⚠️ WARNING: ctransformers library not found. Local GGUF execution will not be available.")
+    print("   To enable local GGUF, run: pip install ctransformers>=0.2.27")
+    AutoModelForCausalLM_GGUF = None # Define as None if import fails
+    CTRANSFORMERS_AVAILABLE = False
+# --- Configuration for Local GGUF ---
+# Set this environment variable or replace the default path
+# Download granite-3.3-2b-instruct-Q2_K.gguf (or other) from Hugging Face
+DEFAULT_GGUF_PATH = "./models/granite-3.3-2b-instruct-Q2_K.gguf" # Example path
+GGUF_MODEL_PATH = os.environ.get("GGUF_MODEL_PATH", DEFAULT_GGUF_PATH)
+CORRECTED_TEMPLATE_FILENAME = "corrected_granite_template.jinja" # Name of your corrected template file
+# --- End Configuration ---
 # Detect Space environment by SPACE_ID env var
 env = os.environ
 is_space = env.get("SPACE_ID") is not None
+print(f"RUNNING IN SPACE? {is_space}")
+# Device setup (primarily for HF model)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load model function (handles HF Space vs Local GGUF)
 def load_model():
+    primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
+    model_name_display = primary_checkpoint # Use this for UI display always
+    # --- Function to load and apply template ---
+    def apply_template_from_file(tokenizer, template_filename):
+        applied_template = False
+        try:
+            print(f"Attempting to load corrected chat template from: {template_filename}")
+            # Ensure the template file path is relative to the script location
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            template_path = os.path.join(script_dir, template_filename)
+            if not os.path.exists(template_path):
+                 print(f"⚠️ WARNING: Corrected template file not found at: {template_path}")
+                 return False # Indicate failure
+            with open(template_path, "r", encoding="utf-8") as f:
+                custom_chat_template_content = f.read()
+            tokenizer.chat_template = custom_chat_template_content
+            applied_template = True
+            print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
+        except FileNotFoundError:
+            # This case is handled by the os.path.exists check above
+            pass
+        except Exception as e:
+            print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
+        # Fallback / Verification print
+        if not applied_template:
+             print("Falling back to tokenizer's default built-in template.")
+        print("--- Final Chat Template Being Used ---")
+        print(tokenizer.chat_template if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template else "No template found or template empty.")
+        print("------------------------------------")
+        return applied_template
+    # --- End function ---
+    if is_space:
+        print(f"🚀 Running in Space. Loading HF model: {primary_checkpoint}")
+        try:
+            # Load HF Tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
+            # Load HF Model
+            model = AutoModelForCausalLM.from_pretrained(
+                primary_checkpoint,
+                torch_dtype=torch.float16,
+                low_cpu_mem_usage=True,
+                device_map="auto" # Use device_map for HF model
+            )
+            print(f"✅ Loaded HF {primary_checkpoint}")
+            apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
+            return tokenizer, model, model_name_display
+        except Exception as e:
+            print(f"❌ HF Primary load failed: {e}")
+            raise RuntimeError(f"Failed to load primary HF model {primary_checkpoint} in Space.") from e
+    else: # Running Locally - Load GGUF
+        print(f"💻 Running Locally. Attempting GGUF setup.")
+        if not CTRANSFORMERS_AVAILABLE:
+             raise RuntimeError("ctransformers library is required for local GGUF execution but not installed.")
+        print(f"   GGUF model path: {GGUF_MODEL_PATH}")
+        print(f"   Using HF tokenizer for template: {primary_checkpoint}")
+        try:
+            # Load HF Tokenizer (needed for apply_chat_template)
+            tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
+            print("✅ Loaded HF Tokenizer for template application.")
+            apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
+            # Check if GGUF file exists before attempting to load
+            if not os.path.exists(GGUF_MODEL_PATH):
+                 raise FileNotFoundError(f"GGUF model file not found at specified path: {GGUF_MODEL_PATH}. Please download the model or set the GGUF_MODEL_PATH environment variable.")
+            # Load GGUF Model using ctransformers
+            model = AutoModelForCausalLM_GGUF.from_pretrained(
+                GGUF_MODEL_PATH,
+                model_type="llama", # Adjust if needed based on model card
+                context_length=4096, # Can be adjusted
+                gpu_layers=0 # CPU-only inference
+            )
+            print(f"✅ Loaded GGUF model {GGUF_MODEL_PATH}")
+            # Display GGUF path in UI when running locally
+            model_name_display = f"GGUF: {os.path.basename(GGUF_MODEL_PATH)}"
+            return tokenizer, model, model_name_display
+        except Exception as e:
+            print(f"❌ Local GGUF load failed: {e}")
+            raise RuntimeError(f"Failed to load local GGUF model or its tokenizer.") from e
+# --- Call load_model ---
+try:
+    tokenizer, model, model_name = load_model()
+except Exception as load_err:
+    print(f"🚨 CRITICAL ERROR DURING MODEL LOADING: {load_err}")
+    # Optionally, exit or provide a dummy model/tokenizer for Gradio UI to load without crashing
+    # For now, we'll let it potentially crash Gradio if loading fails.
+    raise
+# --- Load hotel docs function ---
 def load_hotel_docs(hotel_id):
+    knowledge_dir = "knowledge"
+    path = os.path.join(knowledge_dir, f"{hotel_id}.txt")
     if not os.path.exists(path):
         print(f"⚠️ Knowledge file not found: {path}")
         return []
     try:
         with open(path, encoding="utf-8") as f:
             content = f.read().strip()
+        # Return as list of tuples: [(doc_id, content)]
+        # Using hotel_id as doc_id here
         return [(hotel_id, content)]
     except Exception as e:
         print(f"❌ Error reading knowledge file {path}: {e}")
         return []
+# --- Dynamic Hotel ID Detection ---
+knowledge_dir = "knowledge"
+available_hotels = []
+print("\n🔍 Scanning for available hotels...")
+if os.path.isdir(knowledge_dir):
+    potential_ids = set()
+    for filename in os.listdir(knowledge_dir):
+        if filename.endswith(".txt") and not filename.startswith('.'):
+            if filename.endswith("-system.txt"):
+                base_name = filename[:-len("-system.txt")]
+            else:
+                base_name = filename[:-len(".txt")]
+            if base_name: potential_ids.add(base_name)
+    for hotel_id in sorted(list(potential_ids)):
+        main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
+        system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
+        if os.path.exists(main_file) and os.path.exists(system_file):
+            available_hotels.append(hotel_id)
+            print(f"  ✅ Found valid hotel pair: {hotel_id}")
+        elif os.path.exists(main_file) or os.path.exists(system_file):
+             print(f"  ⚠️ Skipping '{hotel_id}': Missing either '{hotel_id}.txt' or '{hotel_id}-system.txt'")
+else:
+    print(f"❌ Error: Knowledge directory '{knowledge_dir}' not found or is not a directory.")
+if not available_hotels:
+    print("🚨 CRITICAL: No valid hotels found. Dropdown will be empty/disabled.")
+print("Hotel scan complete.\n")
+# --- End Dynamic Hotel ID Detection ---
+# --- Chat function ---
 def chat(message, history, hotel_id):
     # Convert incoming UI history (list of dicts) to tuple list
+    if history is None: history = [] # Ensure history is a list
+    history_tuples = [(m['role'], m['content']) for m in history if isinstance(m, dict) and 'role' in m and 'content' in m]
     history_tuples.append(("user", message))
     # Yield user message immediately
     ui_history = [{"role": r, "content": c} for r, c in history_tuples]
     yield ui_history, "" # Update chat, clear textbox
+    # --- Prompt Preparation (Common for both HF/GGUF) ---
+    input_text = "" # Initialize to avoid potential UnboundLocalError
+    try:
+        # --- Load System Prompt ---
+        default_system_prompt = "You are a helpful hotel assistant..." # Define your default
         system_prompt_filename = f"{hotel_id}-system.txt"
         system_prompt_path = os.path.join("knowledge", system_prompt_filename)
+        system_prompt_content = default_system_prompt
         if os.path.exists(system_prompt_path):
             try:
+                with open(system_prompt_path, "r", encoding="utf-8") as f: loaded_prompt = f.read().strip()
+                if loaded_prompt: system_prompt_content = loaded_prompt
+                else: print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
+            except Exception as e: print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
+        else: print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
+        # --- End Load System Prompt ---
         messages = [{"role": "system", "content": system_prompt_content}]
+        # --- Load and add hotel document(s) ---
         hotel_docs = load_hotel_docs(hotel_id)
         if not hotel_docs:
              # If no knowledge doc found, inform user and stop
              ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
+             yield ui_history, ""
+             return
+        for hotel_doc_id, doc_content in hotel_docs:
             messages.append({
                 "role": "document",
+                "text": doc_content,   # Use 'text' key
+                "doc_id": hotel_doc_id # Use 'doc_id' key
             })
+        # --- End Load Documents ---
+        # --- Include chat history ---
         for role, content in history_tuples:
+            # Exclude the last user message as it's implicitly handled by template
+            if role == "user" and content == message and history_tuples.index((role, content)) == len(history_tuples) - 1:
+                 continue # Skip adding the very last user message again if template adds it
             messages.append({"role": role, "content": content})
+        # --- End Include History ---
+        # --- Set controls ---
         controls = {"length":"short","originality": "abstractive"}
+        # --- Apply the template ---
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             controls=controls
         )
+        print("--- Templated Input ---")
         print(input_text)
+        print("-----------------------")
+    except Exception as e:
+        print(f"❌ Error during prompt preparation: {e}")
+        ui_history.append({"role": "assistant", "content": "Sorry, an error occurred while preparing the prompt."})
+        yield ui_history, ""
+        return
+    # --- Generation Logic: Space (HF) vs Local (GGUF) ---
+    response = "Sorry, an error occurred during generation." # Default error response
+    try:
+        if is_space:
+            # --- HF Model Generation (Space) ---
+            print("🚀 Generating response using HF model...")
+            inputs = tokenizer(input_text, return_tensors="pt").to(device)
+            input_length = inputs.input_ids.shape[1]
+            print(f"DEBUG: Input token length = {input_length}")
+            with torch.no_grad():
+                outputs = model.generate(
+                    inputs.input_ids,
+                    attention_mask=inputs.attention_mask,
+                    max_new_tokens=1024,
+                    do_sample=False,
+                    eos_token_id=tokenizer.eos_token_id # Explicitly use EOS token ID
+                )
+            print(f"DEBUG: Output tokens shape = {outputs.shape}")
+            # Decode using the IBM example strategy
             new_token_ids = outputs[0][input_length:]
+            print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}")
             response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
+            print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}")
+            print("✅ HF Generation complete.")
         else:
+            # --- GGUF Model Generation (Local) ---
+            print("💻 Generating response using GGUF model...")
+            response = model(
+                input_text,
+                max_new_tokens=1024,
+                stop=["<|end_of_text|>"], # Stop sequence for GGUF
+                temperature=0.3 # Example temperature
+            )
+            response = response.strip()
+            print("✅ GGUF Generation complete.")
+        # Handle empty response after generation
+        if not response:
+             response = "Sorry, I encountered an issue generating a response (empty)."
+    except Exception as e:
+        print(f"❌ Error during model generation or decoding: {e}")
+        # Keep the default error response defined above
+    # --- Final Response Handling ---
+    print(f"DEBUG: Final response variable before UI append = {repr(response)}")
+    # Add the final assistant reply to the UI history
+    ui_history.append({"role": "assistant", "content": response})
+    # Final yield with assistant reply
+    yield ui_history, "" # Update chat, keep textbox cleared
+# --- Gradio UI ---
 with gr.Blocks() as demo:
     with gr.Column(variant="panel"):
         gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
+        gr.Markdown(f"**Running:** {model_name}") # Displays HF name or GGUF path
         hotel_selector = gr.Dropdown(
+            choices=available_hotels,
             label="Hotel",
+            value=available_hotels[0] if available_hotels else None,
+            interactive=bool(available_hotels)
         )
         with gr.Row():
+            chatbot = gr.Chatbot(type="messages", label="Chat History", height=500)
         msg = gr.Textbox(
             show_label=False,
             placeholder="Ask about the hotel..."
         )
         clear_btn = gr.Button("Clear")
+        clear_btn.click(lambda: (None, ""), None, [chatbot, msg])
         msg.submit(
             fn=chat,
             inputs=[msg, chatbot, hotel_selector],
+            outputs=[chatbot, msg]
         )
+    if is_space:
+        gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
 # Enable streaming queue
 demo.queue(default_concurrency_limit=2, max_size=32)
 if __name__ == "__main__":
+    print("Launching Gradio Interface...")
     demo.launch()
+    print("Gradio Interface closed.")

requirements.txt CHANGED Viewed

@@ -1,9 +1,11 @@
 aiofiles==24.1.0
 annotated-types==0.7.0
 anyio==4.9.0
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
 exceptiongroup==1.2.2
 fastapi==0.115.12
 ffmpy==0.5.0

+accelerate
 aiofiles==24.1.0
 annotated-types==0.7.0
 anyio==4.9.0
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
+ctransformers>=0.2.27
 exceptiongroup==1.2.2
 fastapi==0.115.12
 ffmpy==0.5.0