looker01202 commited on
Commit
7226a27
·
1 Parent(s): 45c767b

setup local venv to use gguf

Browse files
Files changed (3) hide show
  1. .gitignore +33 -2
  2. app.py +245 -287
  3. requirements.txt +2 -0
.gitignore CHANGED
@@ -1,3 +1,34 @@
1
- myvenv/
2
- .cache/
 
 
 
 
 
3
  __pycache__/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .gitignore
2
+
3
+ # Python Virtual Environment
4
+ venv/
5
+ .venv/
6
+
7
+ # Python cache files
8
  __pycache__/
9
+ *.pyc
10
+ *.pyo
11
+ *.pyd
12
+
13
+ # Local Models (assuming you download GGUF here)
14
+ models/
15
+ *.gguf
16
+
17
+ # IDE / Editor specific files
18
+ .vscode/
19
+ .idea/
20
+ *.sublime-project
21
+ *.sublime-workspace
22
+
23
+ # OS generated files
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Secrets / Environment variables (if you use a .env file later)
28
+ .env
29
+
30
+ # Gradio cache/temp files (optional, but can be useful)
31
+ gradio_cached_examples/
32
+ gradio_cached_examples_log.csv
33
+
34
+ .cache
app.py CHANGED
@@ -1,214 +1,241 @@
1
  import os
2
  import re
 
3
  import gradio as gr
4
  import torch
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Detect Space environment by SPACE_ID env var
8
  env = os.environ
9
  is_space = env.get("SPACE_ID") is not None
10
- print("RUNNING IN SPACE?", is_space)
11
 
12
- # Model selection
13
- if is_space:
14
- primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
15
- fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
16
- else:
17
- primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
18
- fallback_checkpoint = None
19
-
20
- # Device setup
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
22
 
23
- # Load model with fallback
24
  def load_model():
25
- print(f"🔍 Loading model: {primary_checkpoint}")
26
- try:
27
- # Use optimized loading settings suitable for Granite
28
- load_kwargs = {
29
- "use_fast": True,
30
- "torch_dtype": torch.float16,
31
- "low_cpu_mem_usage": True
32
- } if primary_checkpoint.startswith("ibm-granite") else {}
33
-
34
- tokenizer = AutoTokenizer.from_pretrained(
35
- primary_checkpoint,
36
- **{k: v for k, v in load_kwargs.items() if k == 'use_fast'} # Only pass use_fast to tokenizer
37
- )
38
- model = AutoModelForCausalLM.from_pretrained(
39
- primary_checkpoint,
40
- **{k: v for k, v in load_kwargs.items() if k != 'use_fast'} # Pass other kwargs to model
41
- ).to(device)
42
- print(f"✅ Loaded primary {primary_checkpoint}")
43
- return tokenizer, model, primary_checkpoint
44
- except Exception as e:
45
- print(f"❌ Primary load failed: {e}")
46
- if fallback_checkpoint:
47
- print(f"🔁 Falling back to {fallback_checkpoint}")
48
- tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
49
- model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
50
- print(f"✅ Loaded fallback {fallback_checkpoint}")
51
- return tokenizer, model, fallback_checkpoint
52
- raise
53
-
54
- tokenizer, model, model_name = load_model()
55
-
56
-
57
- # --- Start: Apply CORRECTED Chat Template from File (if Granite) ---
58
- if "granite" in model_name.lower():
59
- template_filename = "granite3.3_2b_chat_template.jinja" # Use the new filename
60
- applied_template = False
61
- try:
62
- # Assuming the template file is in the same directory as app.py (project root)
63
- print(f"Attempting to load corrected chat template from: {template_filename}")
64
- with open(template_filename, "r", encoding="utf-8") as f:
65
- custom_chat_template_content = f.read()
66
-
67
- # Assign the loaded template content
68
- tokenizer.chat_template = custom_chat_template_content
69
- applied_template = True
70
- print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
71
-
72
- except FileNotFoundError:
73
- print(f"⚠️ WARNING: Corrected template file '{template_filename}' not found.")
74
- except Exception as e:
75
- print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
76
-
77
- # Fallback / Verification print
78
- if not applied_template:
79
- print("Falling back to tokenizer's default built-in template (which might be incorrect).")
80
- print("--- Final Chat Template Being Used ---")
81
- if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
82
- print(tokenizer.chat_template) # Print the template actually being used
83
- else:
84
- print("Tokenizer does not have a chat_template attribute or it is empty.")
85
- print("------------------------------------")
86
 
87
- else:
88
- print("Model is not Granite, using default chat template.")
89
- # --- End: Apply CORRECTED Chat Template from File ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
91
 
92
- # --- Start: Print Loaded Chat Template ---
93
- print("--- Tokenizer's Loaded Chat Template ---")
94
- # Check if the attribute exists before printing
95
- if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
96
- print(tokenizer.chat_template)
97
- else:
98
- print("Tokenizer does not have a chat_template attribute or it is empty.")
99
- print("------------------------------------")
100
- # --- End: Print Loaded Chat Template ---
101
 
102
- # Load hotel docs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def load_hotel_docs(hotel_id):
104
- path = os.path.join("knowledge", f"{hotel_id}.txt")
 
105
  if not os.path.exists(path):
106
  print(f"⚠️ Knowledge file not found: {path}")
107
  return []
108
  try:
109
  with open(path, encoding="utf-8") as f:
110
  content = f.read().strip()
 
 
111
  return [(hotel_id, content)]
112
  except Exception as e:
113
  print(f"❌ Error reading knowledge file {path}: {e}")
114
  return []
115
 
116
- # Chat function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def chat(message, history, hotel_id):
118
  # Convert incoming UI history (list of dicts) to tuple list
119
- if history is None:
120
- history_tuples = []
121
- else:
122
- history_tuples = [(m['role'], m['content']) for m in history]
123
- # Append the new user turn
124
  history_tuples.append(("user", message))
125
 
126
  # Yield user message immediately
127
  ui_history = [{"role": r, "content": c} for r, c in history_tuples]
128
  yield ui_history, "" # Update chat, clear textbox
129
 
130
- # Local Qwen flow
131
- if not is_space:
132
- # Build messages including the new user turn
133
- msgs = [{"role": role, "content": content} for role, content in history_tuples]
134
- input_text = tokenizer.apply_chat_template(
135
- msgs,
136
- tokenize=False,
137
- add_generation_prompt=True
138
- )
139
- inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
140
-
141
- with torch.no_grad():
142
- outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
143
-
144
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
145
- print("--- Qwen Raw Output ---")
146
- print(decoded)
147
- print("-----------------------")
148
-
149
- # Extract assistant response for Qwen
150
- try:
151
- response = decoded.split("<|im_start|>assistant")[-1]
152
- response = response.split("<|im_end|>")[0].strip()
153
- if not response: # Handle potential empty split
154
- response = "Sorry, I encountered an issue generating a response."
155
- except IndexError:
156
- print("❌ Error splitting Qwen response.")
157
- response = "Sorry, I couldn't parse the model's response."
158
-
159
- # IBM Granite RAG flow (Space environment)
160
- else:
161
- # --- Start: Dynamic System Prompt Loading ---
162
- default_system_prompt = (
163
- "You are a helpful hotel assistant. Use only the provided documents to answer questions about the hotel. "
164
- "Greet guests politely. If the information needed to answer the question is not available in the documents, "
165
- "inform the user that the question cannot be answered based on the available data."
166
- )
167
  system_prompt_filename = f"{hotel_id}-system.txt"
168
  system_prompt_path = os.path.join("knowledge", system_prompt_filename)
169
- system_prompt_content = default_system_prompt # Start with default
170
-
171
  if os.path.exists(system_prompt_path):
172
  try:
173
- with open(system_prompt_path, "r", encoding="utf-8") as f:
174
- loaded_prompt = f.read().strip()
175
- if loaded_prompt: # Use file content only if it's not empty
176
- system_prompt_content = loaded_prompt
177
- print(f" Loaded system prompt from: {system_prompt_path}")
178
- else:
179
- print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
180
- except Exception as e:
181
- print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
182
- else:
183
- print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
184
- # --- End: Dynamic System Prompt Loading ---
185
 
186
  messages = [{"role": "system", "content": system_prompt_content}]
187
 
188
- # Load and add hotel document(s)
189
  hotel_docs = load_hotel_docs(hotel_id)
190
  if not hotel_docs:
191
  # If no knowledge doc found, inform user and stop
192
  ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
193
- yield ui_history, "" # Update chat, keep textbox cleared
194
- return # Exit the function early
195
 
196
- for hotel_doc_id, doc_content in hotel_docs: # Assuming hotel_docs might contain multiple docs later
197
  messages.append({
198
  "role": "document",
199
- "text": doc_content,
200
- "doc_id": hotel_doc_id # <<< CHANGED KEY HERE
201
  })
 
202
 
203
-
204
- # Include full history including the new user message
205
  for role, content in history_tuples:
 
 
 
206
  messages.append({"role": role, "content": content})
 
207
 
208
- # Set meta data (annotations)which influences the bot behaviour in the controls json
209
  controls = {"length":"short","originality": "abstractive"}
210
 
211
- # Apply the template
212
  input_text = tokenizer.apply_chat_template(
213
  messages,
214
  tokenize=False,
@@ -216,178 +243,109 @@ def chat(message, history, hotel_id):
216
  controls=controls
217
  )
218
 
219
- print("--- Granite Templated Input ---")
220
  print(input_text)
221
- print("-----------------------------")
222
-
223
- # --- Tokenize AND get input length/attention mask ---
224
- inputs = tokenizer(input_text, return_tensors="pt").to(device) # Use tokenizer()
225
- input_length = inputs.input_ids.shape[1] # Define input_length using input_ids
226
- print(f"DEBUG: Input token length = {input_length}") # Keep this debug print
227
-
228
- # --- Generate using input_ids and attention_mask ---
229
- with torch.no_grad():
230
- outputs = model.generate(
231
- inputs.input_ids, # Pass input_ids explicitly
232
- attention_mask=inputs.attention_mask, # Pass attention_mask
233
- max_new_tokens=1024,
234
- do_sample=False
235
- )
236
 
237
- # --- Raw output shape printing (keep) ---
238
- print("--- Granite Raw Output Tokens (Shape) ---")
239
- print(outputs.shape)
240
- print("-----------------------------------------")
 
241
 
242
- # --- Start: NEW Decoding Strategy (like IBM example) ---
243
- try:
244
- # Get only the newly generated token IDs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  new_token_ids = outputs[0][input_length:]
246
- print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}") # Debug print
247
-
248
- # Decode only the new tokens, skipping special tokens like <|end_of_text|>
249
  response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
250
- print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}") # Debug print
251
-
252
- if not response:
253
- response = "Sorry, I encountered an issue generating a response (empty)."
254
-
255
- except Exception as e:
256
- print(f"❌ Unexpected Error during NEW decoding: {e}")
257
- response = "Sorry, an unexpected error occurred during decoding."
258
- # --- End: NEW Decoding Strategy ---
259
-
260
- # --- ADD THIS DEBUG LINE (if not already present) ---
261
- print(f"DEBUG: Final response variable before UI append = {repr(response)}")
262
- # --- END ADD THIS DEBUG LINE ---
263
-
264
-
265
-
266
- # Add the final assistant reply to the UI history
267
- ui_history.append({"role": "assistant", "content": response})
268
-
269
- # Final yield with assistant reply
270
- yield ui_history, "" # Update chat, keep textbox cleared
271
-
272
-
273
- # --- Start: Dynamic Hotel ID Detection ---
274
- knowledge_dir = "knowledge"
275
- available_hotels = []
276
-
277
- # --- Add Debugging ---
278
- print(f"DEBUG: Current Working Directory: {os.getcwd()}")
279
- print(f"DEBUG: Checking for knowledge directory at relative path: '{knowledge_dir}'")
280
- knowledge_dir_abs = os.path.abspath(knowledge_dir)
281
- print(f"DEBUG: Absolute path for knowledge directory: '{knowledge_dir_abs}'")
282
- # --- End Debugging ---
283
 
284
- # Check if the knowledge directory exists and is a directory
285
- if os.path.isdir(knowledge_dir):
286
- # --- Add Debugging ---
287
- try:
288
- print(f"DEBUG: Listing contents of '{knowledge_dir_abs}':")
289
- dir_contents = os.listdir(knowledge_dir)
290
- print(f"DEBUG: Found files/dirs: {dir_contents}")
291
- except Exception as e:
292
- print(f"DEBUG: Error listing directory '{knowledge_dir_abs}': {e}")
293
- # --- End Debugging ---
294
-
295
- potential_ids = set()
296
- # First pass: collect all potential base names from .txt files
297
- for filename in os.listdir(knowledge_dir): # Assuming listdir succeeded if we got here
298
- if filename.endswith(".txt") and not filename.startswith('.'): # Ignore hidden files
299
- if filename.endswith("-system.txt"):
300
- # Extract base name from system prompt file
301
- base_name = filename[:-len("-system.txt")]
302
- else:
303
- # Extract base name from main knowledge file
304
- base_name = filename[:-len(".txt")]
305
-
306
- if base_name: # Ensure we got a non-empty base name
307
- potential_ids.add(base_name)
308
-
309
- # Second pass: check if both files exist for each potential ID
310
- # Sort the potential IDs for consistent dropdown order
311
- print(f"DEBUG: Potential hotel IDs found: {sorted(list(potential_ids))}") # Debug potential IDs
312
- for hotel_id in sorted(list(potential_ids)):
313
- main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
314
- system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
315
-
316
- # --- Add Debugging ---
317
- main_file_abs = os.path.abspath(main_file)
318
- system_file_abs = os.path.abspath(system_file)
319
- print(f"DEBUG: Checking pair for ID '{hotel_id}':")
320
- print(f"DEBUG: Main file: '{main_file_abs}' -> Exists? {os.path.exists(main_file)}")
321
- print(f"DEBUG: System file: '{system_file_abs}' -> Exists? {os.path.exists(system_file)}")
322
- # --- End Debugging ---
323
-
324
- # Check if BOTH the main knowledge file AND the system prompt file exist
325
- if os.path.exists(main_file) and os.path.exists(system_file):
326
- available_hotels.append(hotel_id)
327
- print(f"✅ Found valid hotel pair: {hotel_id}")
328
  else:
329
- # Optional: Print a warning if one file exists but not the other
330
- if os.path.exists(main_file) or os.path.exists(system_file):
331
- print(f"⚠️ Skipping '{hotel_id}': Missing either '{hotel_id}.txt' or '{hotel_id}-system.txt'")
332
- # --- Add Debugging ---
333
- # Add an else here to catch cases where NEITHER file exists for a potential ID
334
- elif not os.path.exists(main_file) and not os.path.exists(system_file):
335
- print(f"DEBUG: Neither file found for potential ID '{hotel_id}' at checked paths.")
336
- # --- End Debugging ---
 
 
337
 
338
- else:
339
- print(f"❌ Error: Knowledge directory '{knowledge_dir}' (abs path: '{knowledge_dir_abs}') not found or is not a directory.")
 
340
 
 
 
 
341
 
342
- # Handle case where no valid hotels were found
343
- if not available_hotels:
344
- print("🚨 CRITICAL: No valid hotels found in the knowledge directory. The dropdown will be empty.")
345
- # You might want to add a placeholder or handle this error more gracefully
346
- # For now, the dropdown will just be empty or disabled.
347
- # --- End: Dynamic Hotel ID Detection ---
348
 
349
- # --- The above dynamic detection replaces the old hardcoded list ---
350
- # hotel_ids = ["cyprus-guesthouse-family", "coastal-villa-family", "village-inn-family"] # Remove or comment out this line
351
 
 
 
352
 
353
- # Gradio UI
354
- # Gradio UI
355
  with gr.Blocks() as demo:
356
  with gr.Column(variant="panel"):
357
  gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
358
- gr.Markdown(f"**Running:** {model_name}")
359
 
360
  hotel_selector = gr.Dropdown(
361
- choices=available_hotels, # Use the dynamically generated list
362
  label="Hotel",
363
- value=available_hotels[0] if available_hotels else None, # Set default to first found hotel, or None if empty
364
- interactive=bool(available_hotels) # Disable dropdown if no hotels are found
365
  )
366
 
367
  with gr.Row():
368
- # Use type="messages" for the dictionary format expected by the chat function
369
- chatbot = gr.Chatbot(type="messages", label="Chat History")
370
 
371
  msg = gr.Textbox(
372
  show_label=False,
373
  placeholder="Ask about the hotel..."
374
  )
375
 
376
- # Clear button needs to reset chatbot to None or empty list, and clear textbox
377
  clear_btn = gr.Button("Clear")
378
- clear_btn.click(lambda: (None, ""), None, [chatbot, msg]) # Reset chatbot history to None
379
 
380
- # Wire the textbox submission
381
  msg.submit(
382
  fn=chat,
383
  inputs=[msg, chatbot, hotel_selector],
384
- outputs=[chatbot, msg] # chatbot updates, msg clears
385
  )
386
 
387
- gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
 
388
 
389
  # Enable streaming queue
390
  demo.queue(default_concurrency_limit=2, max_size=32)
391
 
392
  if __name__ == "__main__":
 
393
  demo.launch()
 
 
1
  import os
2
  import re
3
+ import json # For debug printing
4
  import gradio as gr
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ try:
8
+ # Try importing ctransformers for GGUF support
9
+ from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
10
+ CTRANSFORMERS_AVAILABLE = True
11
+ except ImportError:
12
+ print("⚠️ WARNING: ctransformers library not found. Local GGUF execution will not be available.")
13
+ print(" To enable local GGUF, run: pip install ctransformers>=0.2.27")
14
+ AutoModelForCausalLM_GGUF = None # Define as None if import fails
15
+ CTRANSFORMERS_AVAILABLE = False
16
+
17
+ # --- Configuration for Local GGUF ---
18
+ # Set this environment variable or replace the default path
19
+ # Download granite-3.3-2b-instruct-Q2_K.gguf (or other) from Hugging Face
20
+ DEFAULT_GGUF_PATH = "./models/granite-3.3-2b-instruct-Q2_K.gguf" # Example path
21
+ GGUF_MODEL_PATH = os.environ.get("GGUF_MODEL_PATH", DEFAULT_GGUF_PATH)
22
+ CORRECTED_TEMPLATE_FILENAME = "corrected_granite_template.jinja" # Name of your corrected template file
23
+ # --- End Configuration ---
24
 
25
  # Detect Space environment by SPACE_ID env var
26
  env = os.environ
27
  is_space = env.get("SPACE_ID") is not None
28
+ print(f"RUNNING IN SPACE? {is_space}")
29
 
30
+ # Device setup (primarily for HF model)
 
 
 
 
 
 
 
 
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ print(f"Using device: {device}")
33
 
34
+ # Load model function (handles HF Space vs Local GGUF)
35
  def load_model():
36
+ primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
37
+ model_name_display = primary_checkpoint # Use this for UI display always
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # --- Function to load and apply template ---
40
+ def apply_template_from_file(tokenizer, template_filename):
41
+ applied_template = False
42
+ try:
43
+ print(f"Attempting to load corrected chat template from: {template_filename}")
44
+ # Ensure the template file path is relative to the script location
45
+ script_dir = os.path.dirname(os.path.abspath(__file__))
46
+ template_path = os.path.join(script_dir, template_filename)
47
+
48
+ if not os.path.exists(template_path):
49
+ print(f"⚠️ WARNING: Corrected template file not found at: {template_path}")
50
+ return False # Indicate failure
51
+
52
+ with open(template_path, "r", encoding="utf-8") as f:
53
+ custom_chat_template_content = f.read()
54
+ tokenizer.chat_template = custom_chat_template_content
55
+ applied_template = True
56
+ print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
57
+ except FileNotFoundError:
58
+ # This case is handled by the os.path.exists check above
59
+ pass
60
+ except Exception as e:
61
+ print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
62
+
63
+ # Fallback / Verification print
64
+ if not applied_template:
65
+ print("Falling back to tokenizer's default built-in template.")
66
+ print("--- Final Chat Template Being Used ---")
67
+ print(tokenizer.chat_template if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template else "No template found or template empty.")
68
+ print("------------------------------------")
69
+ return applied_template
70
+ # --- End function ---
71
+
72
+ if is_space:
73
+ print(f"🚀 Running in Space. Loading HF model: {primary_checkpoint}")
74
+ try:
75
+ # Load HF Tokenizer
76
+ tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
77
+ # Load HF Model
78
+ model = AutoModelForCausalLM.from_pretrained(
79
+ primary_checkpoint,
80
+ torch_dtype=torch.float16,
81
+ low_cpu_mem_usage=True,
82
+ device_map="auto" # Use device_map for HF model
83
+ )
84
+ print(f"✅ Loaded HF {primary_checkpoint}")
85
+ apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
86
+ return tokenizer, model, model_name_display
87
 
88
+ except Exception as e:
89
+ print(f"❌ HF Primary load failed: {e}")
90
+ raise RuntimeError(f"Failed to load primary HF model {primary_checkpoint} in Space.") from e
91
 
92
+ else: # Running Locally - Load GGUF
93
+ print(f"💻 Running Locally. Attempting GGUF setup.")
94
+ if not CTRANSFORMERS_AVAILABLE:
95
+ raise RuntimeError("ctransformers library is required for local GGUF execution but not installed.")
 
 
 
 
 
96
 
97
+ print(f" GGUF model path: {GGUF_MODEL_PATH}")
98
+ print(f" Using HF tokenizer for template: {primary_checkpoint}")
99
+ try:
100
+ # Load HF Tokenizer (needed for apply_chat_template)
101
+ tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
102
+ print("✅ Loaded HF Tokenizer for template application.")
103
+ apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
104
+
105
+ # Check if GGUF file exists before attempting to load
106
+ if not os.path.exists(GGUF_MODEL_PATH):
107
+ raise FileNotFoundError(f"GGUF model file not found at specified path: {GGUF_MODEL_PATH}. Please download the model or set the GGUF_MODEL_PATH environment variable.")
108
+
109
+ # Load GGUF Model using ctransformers
110
+ model = AutoModelForCausalLM_GGUF.from_pretrained(
111
+ GGUF_MODEL_PATH,
112
+ model_type="llama", # Adjust if needed based on model card
113
+ context_length=4096, # Can be adjusted
114
+ gpu_layers=0 # CPU-only inference
115
+ )
116
+ print(f"✅ Loaded GGUF model {GGUF_MODEL_PATH}")
117
+ # Display GGUF path in UI when running locally
118
+ model_name_display = f"GGUF: {os.path.basename(GGUF_MODEL_PATH)}"
119
+ return tokenizer, model, model_name_display
120
+
121
+ except Exception as e:
122
+ print(f"❌ Local GGUF load failed: {e}")
123
+ raise RuntimeError(f"Failed to load local GGUF model or its tokenizer.") from e
124
+
125
+ # --- Call load_model ---
126
+ try:
127
+ tokenizer, model, model_name = load_model()
128
+ except Exception as load_err:
129
+ print(f"🚨 CRITICAL ERROR DURING MODEL LOADING: {load_err}")
130
+ # Optionally, exit or provide a dummy model/tokenizer for Gradio UI to load without crashing
131
+ # For now, we'll let it potentially crash Gradio if loading fails.
132
+ raise
133
+
134
+ # --- Load hotel docs function ---
135
  def load_hotel_docs(hotel_id):
136
+ knowledge_dir = "knowledge"
137
+ path = os.path.join(knowledge_dir, f"{hotel_id}.txt")
138
  if not os.path.exists(path):
139
  print(f"⚠️ Knowledge file not found: {path}")
140
  return []
141
  try:
142
  with open(path, encoding="utf-8") as f:
143
  content = f.read().strip()
144
+ # Return as list of tuples: [(doc_id, content)]
145
+ # Using hotel_id as doc_id here
146
  return [(hotel_id, content)]
147
  except Exception as e:
148
  print(f"❌ Error reading knowledge file {path}: {e}")
149
  return []
150
 
151
+ # --- Dynamic Hotel ID Detection ---
152
+ knowledge_dir = "knowledge"
153
+ available_hotels = []
154
+ print("\n🔍 Scanning for available hotels...")
155
+ if os.path.isdir(knowledge_dir):
156
+ potential_ids = set()
157
+ for filename in os.listdir(knowledge_dir):
158
+ if filename.endswith(".txt") and not filename.startswith('.'):
159
+ if filename.endswith("-system.txt"):
160
+ base_name = filename[:-len("-system.txt")]
161
+ else:
162
+ base_name = filename[:-len(".txt")]
163
+ if base_name: potential_ids.add(base_name)
164
+
165
+ for hotel_id in sorted(list(potential_ids)):
166
+ main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
167
+ system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
168
+ if os.path.exists(main_file) and os.path.exists(system_file):
169
+ available_hotels.append(hotel_id)
170
+ print(f" ✅ Found valid hotel pair: {hotel_id}")
171
+ elif os.path.exists(main_file) or os.path.exists(system_file):
172
+ print(f" ⚠️ Skipping '{hotel_id}': Missing either '{hotel_id}.txt' or '{hotel_id}-system.txt'")
173
+ else:
174
+ print(f"❌ Error: Knowledge directory '{knowledge_dir}' not found or is not a directory.")
175
+ if not available_hotels:
176
+ print("🚨 CRITICAL: No valid hotels found. Dropdown will be empty/disabled.")
177
+ print("Hotel scan complete.\n")
178
+ # --- End Dynamic Hotel ID Detection ---
179
+
180
+
181
+ # --- Chat function ---
182
  def chat(message, history, hotel_id):
183
  # Convert incoming UI history (list of dicts) to tuple list
184
+ if history is None: history = [] # Ensure history is a list
185
+ history_tuples = [(m['role'], m['content']) for m in history if isinstance(m, dict) and 'role' in m and 'content' in m]
 
 
 
186
  history_tuples.append(("user", message))
187
 
188
  # Yield user message immediately
189
  ui_history = [{"role": r, "content": c} for r, c in history_tuples]
190
  yield ui_history, "" # Update chat, clear textbox
191
 
192
+ # --- Prompt Preparation (Common for both HF/GGUF) ---
193
+ input_text = "" # Initialize to avoid potential UnboundLocalError
194
+ try:
195
+ # --- Load System Prompt ---
196
+ default_system_prompt = "You are a helpful hotel assistant..." # Define your default
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  system_prompt_filename = f"{hotel_id}-system.txt"
198
  system_prompt_path = os.path.join("knowledge", system_prompt_filename)
199
+ system_prompt_content = default_system_prompt
 
200
  if os.path.exists(system_prompt_path):
201
  try:
202
+ with open(system_prompt_path, "r", encoding="utf-8") as f: loaded_prompt = f.read().strip()
203
+ if loaded_prompt: system_prompt_content = loaded_prompt
204
+ else: print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
205
+ except Exception as e: print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
206
+ else: print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
207
+ # --- End Load System Prompt ---
 
 
 
 
 
 
208
 
209
  messages = [{"role": "system", "content": system_prompt_content}]
210
 
211
+ # --- Load and add hotel document(s) ---
212
  hotel_docs = load_hotel_docs(hotel_id)
213
  if not hotel_docs:
214
  # If no knowledge doc found, inform user and stop
215
  ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
216
+ yield ui_history, ""
217
+ return
218
 
219
+ for hotel_doc_id, doc_content in hotel_docs:
220
  messages.append({
221
  "role": "document",
222
+ "text": doc_content, # Use 'text' key
223
+ "doc_id": hotel_doc_id # Use 'doc_id' key
224
  })
225
+ # --- End Load Documents ---
226
 
227
+ # --- Include chat history ---
 
228
  for role, content in history_tuples:
229
+ # Exclude the last user message as it's implicitly handled by template
230
+ if role == "user" and content == message and history_tuples.index((role, content)) == len(history_tuples) - 1:
231
+ continue # Skip adding the very last user message again if template adds it
232
  messages.append({"role": role, "content": content})
233
+ # --- End Include History ---
234
 
235
+ # --- Set controls ---
236
  controls = {"length":"short","originality": "abstractive"}
237
 
238
+ # --- Apply the template ---
239
  input_text = tokenizer.apply_chat_template(
240
  messages,
241
  tokenize=False,
 
243
  controls=controls
244
  )
245
 
246
+ print("--- Templated Input ---")
247
  print(input_text)
248
+ print("-----------------------")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ except Exception as e:
251
+ print(f" Error during prompt preparation: {e}")
252
+ ui_history.append({"role": "assistant", "content": "Sorry, an error occurred while preparing the prompt."})
253
+ yield ui_history, ""
254
+ return
255
 
256
+ # --- Generation Logic: Space (HF) vs Local (GGUF) ---
257
+ response = "Sorry, an error occurred during generation." # Default error response
258
+ try:
259
+ if is_space:
260
+ # --- HF Model Generation (Space) ---
261
+ print("🚀 Generating response using HF model...")
262
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
263
+ input_length = inputs.input_ids.shape[1]
264
+ print(f"DEBUG: Input token length = {input_length}")
265
+
266
+ with torch.no_grad():
267
+ outputs = model.generate(
268
+ inputs.input_ids,
269
+ attention_mask=inputs.attention_mask,
270
+ max_new_tokens=1024,
271
+ do_sample=False,
272
+ eos_token_id=tokenizer.eos_token_id # Explicitly use EOS token ID
273
+ )
274
+ print(f"DEBUG: Output tokens shape = {outputs.shape}")
275
+
276
+ # Decode using the IBM example strategy
277
  new_token_ids = outputs[0][input_length:]
278
+ print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}")
 
 
279
  response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
280
+ print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}")
281
+ print("✅ HF Generation complete.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  else:
284
+ # --- GGUF Model Generation (Local) ---
285
+ print("💻 Generating response using GGUF model...")
286
+ response = model(
287
+ input_text,
288
+ max_new_tokens=1024,
289
+ stop=["<|end_of_text|>"], # Stop sequence for GGUF
290
+ temperature=0.3 # Example temperature
291
+ )
292
+ response = response.strip()
293
+ print("✅ GGUF Generation complete.")
294
 
295
+ # Handle empty response after generation
296
+ if not response:
297
+ response = "Sorry, I encountered an issue generating a response (empty)."
298
 
299
+ except Exception as e:
300
+ print(f"❌ Error during model generation or decoding: {e}")
301
+ # Keep the default error response defined above
302
 
303
+ # --- Final Response Handling ---
304
+ print(f"DEBUG: Final response variable before UI append = {repr(response)}")
 
 
 
 
305
 
306
+ # Add the final assistant reply to the UI history
307
+ ui_history.append({"role": "assistant", "content": response})
308
 
309
+ # Final yield with assistant reply
310
+ yield ui_history, "" # Update chat, keep textbox cleared
311
 
312
+ # --- Gradio UI ---
 
313
  with gr.Blocks() as demo:
314
  with gr.Column(variant="panel"):
315
  gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
316
+ gr.Markdown(f"**Running:** {model_name}") # Displays HF name or GGUF path
317
 
318
  hotel_selector = gr.Dropdown(
319
+ choices=available_hotels,
320
  label="Hotel",
321
+ value=available_hotels[0] if available_hotels else None,
322
+ interactive=bool(available_hotels)
323
  )
324
 
325
  with gr.Row():
326
+ chatbot = gr.Chatbot(type="messages", label="Chat History", height=500)
 
327
 
328
  msg = gr.Textbox(
329
  show_label=False,
330
  placeholder="Ask about the hotel..."
331
  )
332
 
 
333
  clear_btn = gr.Button("Clear")
334
+ clear_btn.click(lambda: (None, ""), None, [chatbot, msg])
335
 
 
336
  msg.submit(
337
  fn=chat,
338
  inputs=[msg, chatbot, hotel_selector],
339
+ outputs=[chatbot, msg]
340
  )
341
 
342
+ if is_space:
343
+ gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
344
 
345
  # Enable streaming queue
346
  demo.queue(default_concurrency_limit=2, max_size=32)
347
 
348
  if __name__ == "__main__":
349
+ print("Launching Gradio Interface...")
350
  demo.launch()
351
+ print("Gradio Interface closed.")
requirements.txt CHANGED
@@ -1,9 +1,11 @@
 
1
  aiofiles==24.1.0
2
  annotated-types==0.7.0
3
  anyio==4.9.0
4
  certifi==2025.1.31
5
  charset-normalizer==3.4.1
6
  click==8.1.8
 
7
  exceptiongroup==1.2.2
8
  fastapi==0.115.12
9
  ffmpy==0.5.0
 
1
+ accelerate
2
  aiofiles==24.1.0
3
  annotated-types==0.7.0
4
  anyio==4.9.0
5
  certifi==2025.1.31
6
  charset-normalizer==3.4.1
7
  click==8.1.8
8
+ ctransformers>=0.2.27
9
  exceptiongroup==1.2.2
10
  fastapi==0.115.12
11
  ffmpy==0.5.0