looker01202 commited on
Commit
f0fe889
Β·
1 Parent(s): 86aae0c

setup local venv to use gguf5

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -102,10 +102,12 @@ def load_model():
102
  # Load GGUF Model using ctransformers, downloading from Hub
103
  # ctransformers will download the specified model_file from the repo_id
104
  # if it's not already cached locally.
 
105
  model = AutoModelForCausalLM_GGUF.from_pretrained(
106
  GGUF_REPO_ID, # Pass the Repository ID
107
  model_file=GGUF_FILENAME, # Specify the exact file to load/download
108
- gpu_layers=0 # CPU-only inference
 
109
  )
110
  print(f"βœ… Loaded GGUF model {GGUF_FILENAME} from {GGUF_REPO_ID}")
111
  # Display GGUF info in UI when running locally
@@ -136,6 +138,14 @@ def load_hotel_docs(hotel_id):
136
  try:
137
  with open(path, encoding="utf-8") as f:
138
  content = f.read().strip()
 
 
 
 
 
 
 
 
139
  return [(hotel_id, content)]
140
  except Exception as e:
141
  print(f"❌ Error reading knowledge file {path}: {e}")
@@ -197,6 +207,9 @@ def chat(message, history, hotel_id):
197
  messages = [{"role": "system", "content": system_prompt_content}]
198
 
199
  hotel_docs = load_hotel_docs(hotel_id)
 
 
 
200
  if not hotel_docs:
201
  ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
202
  yield ui_history, ""
@@ -219,6 +232,15 @@ def chat(message, history, hotel_id):
219
  #controls = {"length":"short","originality": "abstractive"}
220
  controls = {}
221
 
 
 
 
 
 
 
 
 
 
222
  input_text = tokenizer.apply_chat_template(
223
  messages,
224
  tokenize=False,
 
102
  # Load GGUF Model using ctransformers, downloading from Hub
103
  # ctransformers will download the specified model_file from the repo_id
104
  # if it's not already cached locally.
105
+
106
  model = AutoModelForCausalLM_GGUF.from_pretrained(
107
  GGUF_REPO_ID, # Pass the Repository ID
108
  model_file=GGUF_FILENAME, # Specify the exact file to load/download
109
+ model_type="gpt_neox", # or "llama"
110
+ #gpu_layers=0 # CPU-only inference
111
  )
112
  print(f"βœ… Loaded GGUF model {GGUF_FILENAME} from {GGUF_REPO_ID}")
113
  # Display GGUF info in UI when running locally
 
138
  try:
139
  with open(path, encoding="utf-8") as f:
140
  content = f.read().strip()
141
+
142
+ # --- ADD DEBUG PRINT ---
143
+ print(f"DEBUG [load_hotel_docs]: Read {len(content)} chars from {path}. Content starts: '{content[:100]}...'")
144
+ # --- END DEBUG PRINT ---
145
+ if not content:
146
+ print(f"⚠️ WARNING [load_hotel_docs]: File {path} is empty.")
147
+ return []
148
+
149
  return [(hotel_id, content)]
150
  except Exception as e:
151
  print(f"❌ Error reading knowledge file {path}: {e}")
 
207
  messages = [{"role": "system", "content": system_prompt_content}]
208
 
209
  hotel_docs = load_hotel_docs(hotel_id)
210
+ # --- ADD DEBUG PRINT ---
211
+ print(f"DEBUG [chat]: load_hotel_docs returned: {hotel_docs}")
212
+ # --- END DEBUG PRINT ---
213
  if not hotel_docs:
214
  ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
215
  yield ui_history, ""
 
232
  #controls = {"length":"short","originality": "abstractive"}
233
  controls = {}
234
 
235
+ # --- ADD DEBUG PRINT ---
236
+ try:
237
+ import json # Make sure json is imported at the top
238
+ print(f"DEBUG [chat]: Messages list BEFORE apply_chat_template:\n{json.dumps(messages, indent=2)}")
239
+ except Exception as e:
240
+ print(f"DEBUG [chat]: Error printing messages list: {e}")
241
+ print(f"DEBUG [chat]: Raw messages list: {messages}")
242
+ # --- END DEBUG PRINT ---
243
+
244
  input_text = tokenizer.apply_chat_template(
245
  messages,
246
  tokenize=False,