Spaces:

Looker01202
/

hotel-chat

Running on T4

App Files Files Community

looker01202 commited on 12 days ago

Commit

f0fe889

1 Parent(s): 86aae0c

setup local venv to use gguf5

Browse files

Files changed (1) hide show

app.py +23 -1

app.py CHANGED Viewed

@@ -102,10 +102,12 @@ def load_model():
             # Load GGUF Model using ctransformers, downloading from Hub
             # ctransformers will download the specified model_file from the repo_id
             # if it's not already cached locally.
             model = AutoModelForCausalLM_GGUF.from_pretrained(
                 GGUF_REPO_ID,           # Pass the Repository ID
                 model_file=GGUF_FILENAME, # Specify the exact file to load/download
-                gpu_layers=0            # CPU-only inference
             )
             print(f"✅ Loaded GGUF model {GGUF_FILENAME} from {GGUF_REPO_ID}")
             # Display GGUF info in UI when running locally
@@ -136,6 +138,14 @@ def load_hotel_docs(hotel_id):
     try:
         with open(path, encoding="utf-8") as f:
             content = f.read().strip()
         return [(hotel_id, content)]
     except Exception as e:
         print(f"❌ Error reading knowledge file {path}: {e}")
@@ -197,6 +207,9 @@ def chat(message, history, hotel_id):
         messages = [{"role": "system", "content": system_prompt_content}]
         hotel_docs = load_hotel_docs(hotel_id)
         if not hotel_docs:
              ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
              yield ui_history, ""
@@ -219,6 +232,15 @@ def chat(message, history, hotel_id):
         #controls = {"length":"short","originality": "abstractive"}
         controls = {}
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,

             # Load GGUF Model using ctransformers, downloading from Hub
             # ctransformers will download the specified model_file from the repo_id
             # if it's not already cached locally.
             model = AutoModelForCausalLM_GGUF.from_pretrained(
                 GGUF_REPO_ID,           # Pass the Repository ID
                 model_file=GGUF_FILENAME, # Specify the exact file to load/download
+                model_type="gpt_neox",   # or "llama"
+                #gpu_layers=0            # CPU-only inference
             )
             print(f"✅ Loaded GGUF model {GGUF_FILENAME} from {GGUF_REPO_ID}")
             # Display GGUF info in UI when running locally
     try:
         with open(path, encoding="utf-8") as f:
             content = f.read().strip()
+        # --- ADD DEBUG PRINT ---
+        print(f"DEBUG [load_hotel_docs]: Read {len(content)} chars from {path}. Content starts: '{content[:100]}...'")
+        # --- END DEBUG PRINT ---
+        if not content:
+             print(f"⚠️ WARNING [load_hotel_docs]: File {path} is empty.")
+             return []
         return [(hotel_id, content)]
     except Exception as e:
         print(f"❌ Error reading knowledge file {path}: {e}")
         messages = [{"role": "system", "content": system_prompt_content}]
         hotel_docs = load_hotel_docs(hotel_id)
+        # --- ADD DEBUG PRINT ---
+        print(f"DEBUG [chat]: load_hotel_docs returned: {hotel_docs}")
+        # --- END DEBUG PRINT ---
         if not hotel_docs:
              ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
              yield ui_history, ""
         #controls = {"length":"short","originality": "abstractive"}
         controls = {}
+        # --- ADD DEBUG PRINT ---
+        try:
+            import json # Make sure json is imported at the top
+            print(f"DEBUG [chat]: Messages list BEFORE apply_chat_template:\n{json.dumps(messages, indent=2)}")
+        except Exception as e:
+            print(f"DEBUG [chat]: Error printing messages list: {e}")
+            print(f"DEBUG [chat]: Raw messages list: {messages}")
+        # --- END DEBUG PRINT ---
         input_text = tokenizer.apply_chat_template(
             messages,
             tokenize=False,