Spaces:
Paused
Paused
looker01202
commited on
Commit
·
7226a27
1
Parent(s):
45c767b
setup local venv to use gguf
Browse files- .gitignore +33 -2
- app.py +245 -287
- requirements.txt +2 -0
.gitignore
CHANGED
@@ -1,3 +1,34 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
3 |
__pycache__/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# .gitignore
|
2 |
+
|
3 |
+
# Python Virtual Environment
|
4 |
+
venv/
|
5 |
+
.venv/
|
6 |
+
|
7 |
+
# Python cache files
|
8 |
__pycache__/
|
9 |
+
*.pyc
|
10 |
+
*.pyo
|
11 |
+
*.pyd
|
12 |
+
|
13 |
+
# Local Models (assuming you download GGUF here)
|
14 |
+
models/
|
15 |
+
*.gguf
|
16 |
+
|
17 |
+
# IDE / Editor specific files
|
18 |
+
.vscode/
|
19 |
+
.idea/
|
20 |
+
*.sublime-project
|
21 |
+
*.sublime-workspace
|
22 |
+
|
23 |
+
# OS generated files
|
24 |
+
.DS_Store
|
25 |
+
Thumbs.db
|
26 |
+
|
27 |
+
# Secrets / Environment variables (if you use a .env file later)
|
28 |
+
.env
|
29 |
+
|
30 |
+
# Gradio cache/temp files (optional, but can be useful)
|
31 |
+
gradio_cached_examples/
|
32 |
+
gradio_cached_examples_log.csv
|
33 |
+
|
34 |
+
.cache
|
app.py
CHANGED
@@ -1,214 +1,241 @@
|
|
1 |
import os
|
2 |
import re
|
|
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Detect Space environment by SPACE_ID env var
|
8 |
env = os.environ
|
9 |
is_space = env.get("SPACE_ID") is not None
|
10 |
-
print("RUNNING IN SPACE?
|
11 |
|
12 |
-
#
|
13 |
-
if is_space:
|
14 |
-
primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
|
15 |
-
fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
|
16 |
-
else:
|
17 |
-
primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
|
18 |
-
fallback_checkpoint = None
|
19 |
-
|
20 |
-
# Device setup
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
22 |
|
23 |
-
# Load model
|
24 |
def load_model():
|
25 |
-
|
26 |
-
|
27 |
-
# Use optimized loading settings suitable for Granite
|
28 |
-
load_kwargs = {
|
29 |
-
"use_fast": True,
|
30 |
-
"torch_dtype": torch.float16,
|
31 |
-
"low_cpu_mem_usage": True
|
32 |
-
} if primary_checkpoint.startswith("ibm-granite") else {}
|
33 |
-
|
34 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
35 |
-
primary_checkpoint,
|
36 |
-
**{k: v for k, v in load_kwargs.items() if k == 'use_fast'} # Only pass use_fast to tokenizer
|
37 |
-
)
|
38 |
-
model = AutoModelForCausalLM.from_pretrained(
|
39 |
-
primary_checkpoint,
|
40 |
-
**{k: v for k, v in load_kwargs.items() if k != 'use_fast'} # Pass other kwargs to model
|
41 |
-
).to(device)
|
42 |
-
print(f"✅ Loaded primary {primary_checkpoint}")
|
43 |
-
return tokenizer, model, primary_checkpoint
|
44 |
-
except Exception as e:
|
45 |
-
print(f"❌ Primary load failed: {e}")
|
46 |
-
if fallback_checkpoint:
|
47 |
-
print(f"🔁 Falling back to {fallback_checkpoint}")
|
48 |
-
tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
|
49 |
-
model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
|
50 |
-
print(f"✅ Loaded fallback {fallback_checkpoint}")
|
51 |
-
return tokenizer, model, fallback_checkpoint
|
52 |
-
raise
|
53 |
-
|
54 |
-
tokenizer, model, model_name = load_model()
|
55 |
-
|
56 |
-
|
57 |
-
# --- Start: Apply CORRECTED Chat Template from File (if Granite) ---
|
58 |
-
if "granite" in model_name.lower():
|
59 |
-
template_filename = "granite3.3_2b_chat_template.jinja" # Use the new filename
|
60 |
-
applied_template = False
|
61 |
-
try:
|
62 |
-
# Assuming the template file is in the same directory as app.py (project root)
|
63 |
-
print(f"Attempting to load corrected chat template from: {template_filename}")
|
64 |
-
with open(template_filename, "r", encoding="utf-8") as f:
|
65 |
-
custom_chat_template_content = f.read()
|
66 |
-
|
67 |
-
# Assign the loaded template content
|
68 |
-
tokenizer.chat_template = custom_chat_template_content
|
69 |
-
applied_template = True
|
70 |
-
print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
|
71 |
-
|
72 |
-
except FileNotFoundError:
|
73 |
-
print(f"⚠️ WARNING: Corrected template file '{template_filename}' not found.")
|
74 |
-
except Exception as e:
|
75 |
-
print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
|
76 |
-
|
77 |
-
# Fallback / Verification print
|
78 |
-
if not applied_template:
|
79 |
-
print("Falling back to tokenizer's default built-in template (which might be incorrect).")
|
80 |
-
print("--- Final Chat Template Being Used ---")
|
81 |
-
if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
|
82 |
-
print(tokenizer.chat_template) # Print the template actually being used
|
83 |
-
else:
|
84 |
-
print("Tokenizer does not have a chat_template attribute or it is empty.")
|
85 |
-
print("------------------------------------")
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
|
|
|
|
|
|
91 |
|
92 |
-
#
|
93 |
-
print("
|
94 |
-
|
95 |
-
|
96 |
-
print(tokenizer.chat_template)
|
97 |
-
else:
|
98 |
-
print("Tokenizer does not have a chat_template attribute or it is empty.")
|
99 |
-
print("------------------------------------")
|
100 |
-
# --- End: Print Loaded Chat Template ---
|
101 |
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
def load_hotel_docs(hotel_id):
|
104 |
-
|
|
|
105 |
if not os.path.exists(path):
|
106 |
print(f"⚠️ Knowledge file not found: {path}")
|
107 |
return []
|
108 |
try:
|
109 |
with open(path, encoding="utf-8") as f:
|
110 |
content = f.read().strip()
|
|
|
|
|
111 |
return [(hotel_id, content)]
|
112 |
except Exception as e:
|
113 |
print(f"❌ Error reading knowledge file {path}: {e}")
|
114 |
return []
|
115 |
|
116 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
def chat(message, history, hotel_id):
|
118 |
# Convert incoming UI history (list of dicts) to tuple list
|
119 |
-
if history is None:
|
120 |
-
|
121 |
-
else:
|
122 |
-
history_tuples = [(m['role'], m['content']) for m in history]
|
123 |
-
# Append the new user turn
|
124 |
history_tuples.append(("user", message))
|
125 |
|
126 |
# Yield user message immediately
|
127 |
ui_history = [{"role": r, "content": c} for r, c in history_tuples]
|
128 |
yield ui_history, "" # Update chat, clear textbox
|
129 |
|
130 |
-
#
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
msgs,
|
136 |
-
tokenize=False,
|
137 |
-
add_generation_prompt=True
|
138 |
-
)
|
139 |
-
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
140 |
-
|
141 |
-
with torch.no_grad():
|
142 |
-
outputs = model.generate(inputs, max_new_tokens=1024, do_sample=True)
|
143 |
-
|
144 |
-
decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
145 |
-
print("--- Qwen Raw Output ---")
|
146 |
-
print(decoded)
|
147 |
-
print("-----------------------")
|
148 |
-
|
149 |
-
# Extract assistant response for Qwen
|
150 |
-
try:
|
151 |
-
response = decoded.split("<|im_start|>assistant")[-1]
|
152 |
-
response = response.split("<|im_end|>")[0].strip()
|
153 |
-
if not response: # Handle potential empty split
|
154 |
-
response = "Sorry, I encountered an issue generating a response."
|
155 |
-
except IndexError:
|
156 |
-
print("❌ Error splitting Qwen response.")
|
157 |
-
response = "Sorry, I couldn't parse the model's response."
|
158 |
-
|
159 |
-
# IBM Granite RAG flow (Space environment)
|
160 |
-
else:
|
161 |
-
# --- Start: Dynamic System Prompt Loading ---
|
162 |
-
default_system_prompt = (
|
163 |
-
"You are a helpful hotel assistant. Use only the provided documents to answer questions about the hotel. "
|
164 |
-
"Greet guests politely. If the information needed to answer the question is not available in the documents, "
|
165 |
-
"inform the user that the question cannot be answered based on the available data."
|
166 |
-
)
|
167 |
system_prompt_filename = f"{hotel_id}-system.txt"
|
168 |
system_prompt_path = os.path.join("knowledge", system_prompt_filename)
|
169 |
-
system_prompt_content = default_system_prompt
|
170 |
-
|
171 |
if os.path.exists(system_prompt_path):
|
172 |
try:
|
173 |
-
with open(system_prompt_path, "r", encoding="utf-8") as f:
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
|
180 |
-
except Exception as e:
|
181 |
-
print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
|
182 |
-
else:
|
183 |
-
print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
|
184 |
-
# --- End: Dynamic System Prompt Loading ---
|
185 |
|
186 |
messages = [{"role": "system", "content": system_prompt_content}]
|
187 |
|
188 |
-
# Load and add hotel document(s)
|
189 |
hotel_docs = load_hotel_docs(hotel_id)
|
190 |
if not hotel_docs:
|
191 |
# If no knowledge doc found, inform user and stop
|
192 |
ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
|
193 |
-
yield ui_history, ""
|
194 |
-
return
|
195 |
|
196 |
-
for hotel_doc_id, doc_content in hotel_docs:
|
197 |
messages.append({
|
198 |
"role": "document",
|
199 |
-
"text": doc_content,
|
200 |
-
"doc_id": hotel_doc_id #
|
201 |
})
|
|
|
202 |
|
203 |
-
|
204 |
-
# Include full history including the new user message
|
205 |
for role, content in history_tuples:
|
|
|
|
|
|
|
206 |
messages.append({"role": role, "content": content})
|
|
|
207 |
|
208 |
-
# Set
|
209 |
controls = {"length":"short","originality": "abstractive"}
|
210 |
|
211 |
-
# Apply the template
|
212 |
input_text = tokenizer.apply_chat_template(
|
213 |
messages,
|
214 |
tokenize=False,
|
@@ -216,178 +243,109 @@ def chat(message, history, hotel_id):
|
|
216 |
controls=controls
|
217 |
)
|
218 |
|
219 |
-
print("---
|
220 |
print(input_text)
|
221 |
-
print("
|
222 |
-
|
223 |
-
# --- Tokenize AND get input length/attention mask ---
|
224 |
-
inputs = tokenizer(input_text, return_tensors="pt").to(device) # Use tokenizer()
|
225 |
-
input_length = inputs.input_ids.shape[1] # Define input_length using input_ids
|
226 |
-
print(f"DEBUG: Input token length = {input_length}") # Keep this debug print
|
227 |
-
|
228 |
-
# --- Generate using input_ids and attention_mask ---
|
229 |
-
with torch.no_grad():
|
230 |
-
outputs = model.generate(
|
231 |
-
inputs.input_ids, # Pass input_ids explicitly
|
232 |
-
attention_mask=inputs.attention_mask, # Pass attention_mask
|
233 |
-
max_new_tokens=1024,
|
234 |
-
do_sample=False
|
235 |
-
)
|
236 |
|
237 |
-
|
238 |
-
print("
|
239 |
-
|
240 |
-
|
|
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
new_token_ids = outputs[0][input_length:]
|
246 |
-
print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}")
|
247 |
-
|
248 |
-
# Decode only the new tokens, skipping special tokens like <|end_of_text|>
|
249 |
response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
|
250 |
-
print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}")
|
251 |
-
|
252 |
-
if not response:
|
253 |
-
response = "Sorry, I encountered an issue generating a response (empty)."
|
254 |
-
|
255 |
-
except Exception as e:
|
256 |
-
print(f"❌ Unexpected Error during NEW decoding: {e}")
|
257 |
-
response = "Sorry, an unexpected error occurred during decoding."
|
258 |
-
# --- End: NEW Decoding Strategy ---
|
259 |
-
|
260 |
-
# --- ADD THIS DEBUG LINE (if not already present) ---
|
261 |
-
print(f"DEBUG: Final response variable before UI append = {repr(response)}")
|
262 |
-
# --- END ADD THIS DEBUG LINE ---
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
# Add the final assistant reply to the UI history
|
267 |
-
ui_history.append({"role": "assistant", "content": response})
|
268 |
-
|
269 |
-
# Final yield with assistant reply
|
270 |
-
yield ui_history, "" # Update chat, keep textbox cleared
|
271 |
-
|
272 |
-
|
273 |
-
# --- Start: Dynamic Hotel ID Detection ---
|
274 |
-
knowledge_dir = "knowledge"
|
275 |
-
available_hotels = []
|
276 |
-
|
277 |
-
# --- Add Debugging ---
|
278 |
-
print(f"DEBUG: Current Working Directory: {os.getcwd()}")
|
279 |
-
print(f"DEBUG: Checking for knowledge directory at relative path: '{knowledge_dir}'")
|
280 |
-
knowledge_dir_abs = os.path.abspath(knowledge_dir)
|
281 |
-
print(f"DEBUG: Absolute path for knowledge directory: '{knowledge_dir_abs}'")
|
282 |
-
# --- End Debugging ---
|
283 |
|
284 |
-
# Check if the knowledge directory exists and is a directory
|
285 |
-
if os.path.isdir(knowledge_dir):
|
286 |
-
# --- Add Debugging ---
|
287 |
-
try:
|
288 |
-
print(f"DEBUG: Listing contents of '{knowledge_dir_abs}':")
|
289 |
-
dir_contents = os.listdir(knowledge_dir)
|
290 |
-
print(f"DEBUG: Found files/dirs: {dir_contents}")
|
291 |
-
except Exception as e:
|
292 |
-
print(f"DEBUG: Error listing directory '{knowledge_dir_abs}': {e}")
|
293 |
-
# --- End Debugging ---
|
294 |
-
|
295 |
-
potential_ids = set()
|
296 |
-
# First pass: collect all potential base names from .txt files
|
297 |
-
for filename in os.listdir(knowledge_dir): # Assuming listdir succeeded if we got here
|
298 |
-
if filename.endswith(".txt") and not filename.startswith('.'): # Ignore hidden files
|
299 |
-
if filename.endswith("-system.txt"):
|
300 |
-
# Extract base name from system prompt file
|
301 |
-
base_name = filename[:-len("-system.txt")]
|
302 |
-
else:
|
303 |
-
# Extract base name from main knowledge file
|
304 |
-
base_name = filename[:-len(".txt")]
|
305 |
-
|
306 |
-
if base_name: # Ensure we got a non-empty base name
|
307 |
-
potential_ids.add(base_name)
|
308 |
-
|
309 |
-
# Second pass: check if both files exist for each potential ID
|
310 |
-
# Sort the potential IDs for consistent dropdown order
|
311 |
-
print(f"DEBUG: Potential hotel IDs found: {sorted(list(potential_ids))}") # Debug potential IDs
|
312 |
-
for hotel_id in sorted(list(potential_ids)):
|
313 |
-
main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
|
314 |
-
system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
|
315 |
-
|
316 |
-
# --- Add Debugging ---
|
317 |
-
main_file_abs = os.path.abspath(main_file)
|
318 |
-
system_file_abs = os.path.abspath(system_file)
|
319 |
-
print(f"DEBUG: Checking pair for ID '{hotel_id}':")
|
320 |
-
print(f"DEBUG: Main file: '{main_file_abs}' -> Exists? {os.path.exists(main_file)}")
|
321 |
-
print(f"DEBUG: System file: '{system_file_abs}' -> Exists? {os.path.exists(system_file)}")
|
322 |
-
# --- End Debugging ---
|
323 |
-
|
324 |
-
# Check if BOTH the main knowledge file AND the system prompt file exist
|
325 |
-
if os.path.exists(main_file) and os.path.exists(system_file):
|
326 |
-
available_hotels.append(hotel_id)
|
327 |
-
print(f"✅ Found valid hotel pair: {hotel_id}")
|
328 |
else:
|
329 |
-
#
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
|
|
|
|
337 |
|
338 |
-
|
339 |
-
|
|
|
340 |
|
|
|
|
|
|
|
341 |
|
342 |
-
#
|
343 |
-
|
344 |
-
print("🚨 CRITICAL: No valid hotels found in the knowledge directory. The dropdown will be empty.")
|
345 |
-
# You might want to add a placeholder or handle this error more gracefully
|
346 |
-
# For now, the dropdown will just be empty or disabled.
|
347 |
-
# --- End: Dynamic Hotel ID Detection ---
|
348 |
|
349 |
-
#
|
350 |
-
|
351 |
|
|
|
|
|
352 |
|
353 |
-
# Gradio UI
|
354 |
-
# Gradio UI
|
355 |
with gr.Blocks() as demo:
|
356 |
with gr.Column(variant="panel"):
|
357 |
gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
|
358 |
-
gr.Markdown(f"**Running:** {model_name}")
|
359 |
|
360 |
hotel_selector = gr.Dropdown(
|
361 |
-
choices=available_hotels,
|
362 |
label="Hotel",
|
363 |
-
value=available_hotels[0] if available_hotels else None,
|
364 |
-
interactive=bool(available_hotels)
|
365 |
)
|
366 |
|
367 |
with gr.Row():
|
368 |
-
|
369 |
-
chatbot = gr.Chatbot(type="messages", label="Chat History")
|
370 |
|
371 |
msg = gr.Textbox(
|
372 |
show_label=False,
|
373 |
placeholder="Ask about the hotel..."
|
374 |
)
|
375 |
|
376 |
-
# Clear button needs to reset chatbot to None or empty list, and clear textbox
|
377 |
clear_btn = gr.Button("Clear")
|
378 |
-
clear_btn.click(lambda: (None, ""), None, [chatbot, msg])
|
379 |
|
380 |
-
# Wire the textbox submission
|
381 |
msg.submit(
|
382 |
fn=chat,
|
383 |
inputs=[msg, chatbot, hotel_selector],
|
384 |
-
outputs=[chatbot, msg]
|
385 |
)
|
386 |
|
387 |
-
|
|
|
388 |
|
389 |
# Enable streaming queue
|
390 |
demo.queue(default_concurrency_limit=2, max_size=32)
|
391 |
|
392 |
if __name__ == "__main__":
|
|
|
393 |
demo.launch()
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
+
import json # For debug printing
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
+
try:
|
8 |
+
# Try importing ctransformers for GGUF support
|
9 |
+
from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
|
10 |
+
CTRANSFORMERS_AVAILABLE = True
|
11 |
+
except ImportError:
|
12 |
+
print("⚠️ WARNING: ctransformers library not found. Local GGUF execution will not be available.")
|
13 |
+
print(" To enable local GGUF, run: pip install ctransformers>=0.2.27")
|
14 |
+
AutoModelForCausalLM_GGUF = None # Define as None if import fails
|
15 |
+
CTRANSFORMERS_AVAILABLE = False
|
16 |
+
|
17 |
+
# --- Configuration for Local GGUF ---
|
18 |
+
# Set this environment variable or replace the default path
|
19 |
+
# Download granite-3.3-2b-instruct-Q2_K.gguf (or other) from Hugging Face
|
20 |
+
DEFAULT_GGUF_PATH = "./models/granite-3.3-2b-instruct-Q2_K.gguf" # Example path
|
21 |
+
GGUF_MODEL_PATH = os.environ.get("GGUF_MODEL_PATH", DEFAULT_GGUF_PATH)
|
22 |
+
CORRECTED_TEMPLATE_FILENAME = "corrected_granite_template.jinja" # Name of your corrected template file
|
23 |
+
# --- End Configuration ---
|
24 |
|
25 |
# Detect Space environment by SPACE_ID env var
|
26 |
env = os.environ
|
27 |
is_space = env.get("SPACE_ID") is not None
|
28 |
+
print(f"RUNNING IN SPACE? {is_space}")
|
29 |
|
30 |
+
# Device setup (primarily for HF model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
32 |
+
print(f"Using device: {device}")
|
33 |
|
34 |
+
# Load model function (handles HF Space vs Local GGUF)
|
35 |
def load_model():
|
36 |
+
primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
|
37 |
+
model_name_display = primary_checkpoint # Use this for UI display always
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
# --- Function to load and apply template ---
|
40 |
+
def apply_template_from_file(tokenizer, template_filename):
|
41 |
+
applied_template = False
|
42 |
+
try:
|
43 |
+
print(f"Attempting to load corrected chat template from: {template_filename}")
|
44 |
+
# Ensure the template file path is relative to the script location
|
45 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
46 |
+
template_path = os.path.join(script_dir, template_filename)
|
47 |
+
|
48 |
+
if not os.path.exists(template_path):
|
49 |
+
print(f"⚠️ WARNING: Corrected template file not found at: {template_path}")
|
50 |
+
return False # Indicate failure
|
51 |
+
|
52 |
+
with open(template_path, "r", encoding="utf-8") as f:
|
53 |
+
custom_chat_template_content = f.read()
|
54 |
+
tokenizer.chat_template = custom_chat_template_content
|
55 |
+
applied_template = True
|
56 |
+
print(f"✅ Loaded and applied corrected chat template from: {template_filename}")
|
57 |
+
except FileNotFoundError:
|
58 |
+
# This case is handled by the os.path.exists check above
|
59 |
+
pass
|
60 |
+
except Exception as e:
|
61 |
+
print(f"❌ ERROR reading corrected template file '{template_filename}': {e}")
|
62 |
+
|
63 |
+
# Fallback / Verification print
|
64 |
+
if not applied_template:
|
65 |
+
print("Falling back to tokenizer's default built-in template.")
|
66 |
+
print("--- Final Chat Template Being Used ---")
|
67 |
+
print(tokenizer.chat_template if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template else "No template found or template empty.")
|
68 |
+
print("------------------------------------")
|
69 |
+
return applied_template
|
70 |
+
# --- End function ---
|
71 |
+
|
72 |
+
if is_space:
|
73 |
+
print(f"🚀 Running in Space. Loading HF model: {primary_checkpoint}")
|
74 |
+
try:
|
75 |
+
# Load HF Tokenizer
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
|
77 |
+
# Load HF Model
|
78 |
+
model = AutoModelForCausalLM.from_pretrained(
|
79 |
+
primary_checkpoint,
|
80 |
+
torch_dtype=torch.float16,
|
81 |
+
low_cpu_mem_usage=True,
|
82 |
+
device_map="auto" # Use device_map for HF model
|
83 |
+
)
|
84 |
+
print(f"✅ Loaded HF {primary_checkpoint}")
|
85 |
+
apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
|
86 |
+
return tokenizer, model, model_name_display
|
87 |
|
88 |
+
except Exception as e:
|
89 |
+
print(f"❌ HF Primary load failed: {e}")
|
90 |
+
raise RuntimeError(f"Failed to load primary HF model {primary_checkpoint} in Space.") from e
|
91 |
|
92 |
+
else: # Running Locally - Load GGUF
|
93 |
+
print(f"💻 Running Locally. Attempting GGUF setup.")
|
94 |
+
if not CTRANSFORMERS_AVAILABLE:
|
95 |
+
raise RuntimeError("ctransformers library is required for local GGUF execution but not installed.")
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
print(f" GGUF model path: {GGUF_MODEL_PATH}")
|
98 |
+
print(f" Using HF tokenizer for template: {primary_checkpoint}")
|
99 |
+
try:
|
100 |
+
# Load HF Tokenizer (needed for apply_chat_template)
|
101 |
+
tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint, use_fast=True)
|
102 |
+
print("✅ Loaded HF Tokenizer for template application.")
|
103 |
+
apply_template_from_file(tokenizer, CORRECTED_TEMPLATE_FILENAME)
|
104 |
+
|
105 |
+
# Check if GGUF file exists before attempting to load
|
106 |
+
if not os.path.exists(GGUF_MODEL_PATH):
|
107 |
+
raise FileNotFoundError(f"GGUF model file not found at specified path: {GGUF_MODEL_PATH}. Please download the model or set the GGUF_MODEL_PATH environment variable.")
|
108 |
+
|
109 |
+
# Load GGUF Model using ctransformers
|
110 |
+
model = AutoModelForCausalLM_GGUF.from_pretrained(
|
111 |
+
GGUF_MODEL_PATH,
|
112 |
+
model_type="llama", # Adjust if needed based on model card
|
113 |
+
context_length=4096, # Can be adjusted
|
114 |
+
gpu_layers=0 # CPU-only inference
|
115 |
+
)
|
116 |
+
print(f"✅ Loaded GGUF model {GGUF_MODEL_PATH}")
|
117 |
+
# Display GGUF path in UI when running locally
|
118 |
+
model_name_display = f"GGUF: {os.path.basename(GGUF_MODEL_PATH)}"
|
119 |
+
return tokenizer, model, model_name_display
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
print(f"❌ Local GGUF load failed: {e}")
|
123 |
+
raise RuntimeError(f"Failed to load local GGUF model or its tokenizer.") from e
|
124 |
+
|
125 |
+
# --- Call load_model ---
|
126 |
+
try:
|
127 |
+
tokenizer, model, model_name = load_model()
|
128 |
+
except Exception as load_err:
|
129 |
+
print(f"🚨 CRITICAL ERROR DURING MODEL LOADING: {load_err}")
|
130 |
+
# Optionally, exit or provide a dummy model/tokenizer for Gradio UI to load without crashing
|
131 |
+
# For now, we'll let it potentially crash Gradio if loading fails.
|
132 |
+
raise
|
133 |
+
|
134 |
+
# --- Load hotel docs function ---
|
135 |
def load_hotel_docs(hotel_id):
|
136 |
+
knowledge_dir = "knowledge"
|
137 |
+
path = os.path.join(knowledge_dir, f"{hotel_id}.txt")
|
138 |
if not os.path.exists(path):
|
139 |
print(f"⚠️ Knowledge file not found: {path}")
|
140 |
return []
|
141 |
try:
|
142 |
with open(path, encoding="utf-8") as f:
|
143 |
content = f.read().strip()
|
144 |
+
# Return as list of tuples: [(doc_id, content)]
|
145 |
+
# Using hotel_id as doc_id here
|
146 |
return [(hotel_id, content)]
|
147 |
except Exception as e:
|
148 |
print(f"❌ Error reading knowledge file {path}: {e}")
|
149 |
return []
|
150 |
|
151 |
+
# --- Dynamic Hotel ID Detection ---
|
152 |
+
knowledge_dir = "knowledge"
|
153 |
+
available_hotels = []
|
154 |
+
print("\n🔍 Scanning for available hotels...")
|
155 |
+
if os.path.isdir(knowledge_dir):
|
156 |
+
potential_ids = set()
|
157 |
+
for filename in os.listdir(knowledge_dir):
|
158 |
+
if filename.endswith(".txt") and not filename.startswith('.'):
|
159 |
+
if filename.endswith("-system.txt"):
|
160 |
+
base_name = filename[:-len("-system.txt")]
|
161 |
+
else:
|
162 |
+
base_name = filename[:-len(".txt")]
|
163 |
+
if base_name: potential_ids.add(base_name)
|
164 |
+
|
165 |
+
for hotel_id in sorted(list(potential_ids)):
|
166 |
+
main_file = os.path.join(knowledge_dir, f"{hotel_id}.txt")
|
167 |
+
system_file = os.path.join(knowledge_dir, f"{hotel_id}-system.txt")
|
168 |
+
if os.path.exists(main_file) and os.path.exists(system_file):
|
169 |
+
available_hotels.append(hotel_id)
|
170 |
+
print(f" ✅ Found valid hotel pair: {hotel_id}")
|
171 |
+
elif os.path.exists(main_file) or os.path.exists(system_file):
|
172 |
+
print(f" ⚠️ Skipping '{hotel_id}': Missing either '{hotel_id}.txt' or '{hotel_id}-system.txt'")
|
173 |
+
else:
|
174 |
+
print(f"❌ Error: Knowledge directory '{knowledge_dir}' not found or is not a directory.")
|
175 |
+
if not available_hotels:
|
176 |
+
print("🚨 CRITICAL: No valid hotels found. Dropdown will be empty/disabled.")
|
177 |
+
print("Hotel scan complete.\n")
|
178 |
+
# --- End Dynamic Hotel ID Detection ---
|
179 |
+
|
180 |
+
|
181 |
+
# --- Chat function ---
|
182 |
def chat(message, history, hotel_id):
|
183 |
# Convert incoming UI history (list of dicts) to tuple list
|
184 |
+
if history is None: history = [] # Ensure history is a list
|
185 |
+
history_tuples = [(m['role'], m['content']) for m in history if isinstance(m, dict) and 'role' in m and 'content' in m]
|
|
|
|
|
|
|
186 |
history_tuples.append(("user", message))
|
187 |
|
188 |
# Yield user message immediately
|
189 |
ui_history = [{"role": r, "content": c} for r, c in history_tuples]
|
190 |
yield ui_history, "" # Update chat, clear textbox
|
191 |
|
192 |
+
# --- Prompt Preparation (Common for both HF/GGUF) ---
|
193 |
+
input_text = "" # Initialize to avoid potential UnboundLocalError
|
194 |
+
try:
|
195 |
+
# --- Load System Prompt ---
|
196 |
+
default_system_prompt = "You are a helpful hotel assistant..." # Define your default
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
system_prompt_filename = f"{hotel_id}-system.txt"
|
198 |
system_prompt_path = os.path.join("knowledge", system_prompt_filename)
|
199 |
+
system_prompt_content = default_system_prompt
|
|
|
200 |
if os.path.exists(system_prompt_path):
|
201 |
try:
|
202 |
+
with open(system_prompt_path, "r", encoding="utf-8") as f: loaded_prompt = f.read().strip()
|
203 |
+
if loaded_prompt: system_prompt_content = loaded_prompt
|
204 |
+
else: print(f"⚠️ System prompt file '{system_prompt_path}' is empty. Using default.")
|
205 |
+
except Exception as e: print(f"❌ Error reading system prompt file '{system_prompt_path}': {e}. Using default.")
|
206 |
+
else: print(f"⚠️ System prompt file not found: '{system_prompt_path}'. Using default.")
|
207 |
+
# --- End Load System Prompt ---
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
messages = [{"role": "system", "content": system_prompt_content}]
|
210 |
|
211 |
+
# --- Load and add hotel document(s) ---
|
212 |
hotel_docs = load_hotel_docs(hotel_id)
|
213 |
if not hotel_docs:
|
214 |
# If no knowledge doc found, inform user and stop
|
215 |
ui_history.append({"role": "assistant", "content": f"Sorry, I don't have specific information loaded for the hotel '{hotel_id}'."})
|
216 |
+
yield ui_history, ""
|
217 |
+
return
|
218 |
|
219 |
+
for hotel_doc_id, doc_content in hotel_docs:
|
220 |
messages.append({
|
221 |
"role": "document",
|
222 |
+
"text": doc_content, # Use 'text' key
|
223 |
+
"doc_id": hotel_doc_id # Use 'doc_id' key
|
224 |
})
|
225 |
+
# --- End Load Documents ---
|
226 |
|
227 |
+
# --- Include chat history ---
|
|
|
228 |
for role, content in history_tuples:
|
229 |
+
# Exclude the last user message as it's implicitly handled by template
|
230 |
+
if role == "user" and content == message and history_tuples.index((role, content)) == len(history_tuples) - 1:
|
231 |
+
continue # Skip adding the very last user message again if template adds it
|
232 |
messages.append({"role": role, "content": content})
|
233 |
+
# --- End Include History ---
|
234 |
|
235 |
+
# --- Set controls ---
|
236 |
controls = {"length":"short","originality": "abstractive"}
|
237 |
|
238 |
+
# --- Apply the template ---
|
239 |
input_text = tokenizer.apply_chat_template(
|
240 |
messages,
|
241 |
tokenize=False,
|
|
|
243 |
controls=controls
|
244 |
)
|
245 |
|
246 |
+
print("--- Templated Input ---")
|
247 |
print(input_text)
|
248 |
+
print("-----------------------")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
+
except Exception as e:
|
251 |
+
print(f"❌ Error during prompt preparation: {e}")
|
252 |
+
ui_history.append({"role": "assistant", "content": "Sorry, an error occurred while preparing the prompt."})
|
253 |
+
yield ui_history, ""
|
254 |
+
return
|
255 |
|
256 |
+
# --- Generation Logic: Space (HF) vs Local (GGUF) ---
|
257 |
+
response = "Sorry, an error occurred during generation." # Default error response
|
258 |
+
try:
|
259 |
+
if is_space:
|
260 |
+
# --- HF Model Generation (Space) ---
|
261 |
+
print("🚀 Generating response using HF model...")
|
262 |
+
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
263 |
+
input_length = inputs.input_ids.shape[1]
|
264 |
+
print(f"DEBUG: Input token length = {input_length}")
|
265 |
+
|
266 |
+
with torch.no_grad():
|
267 |
+
outputs = model.generate(
|
268 |
+
inputs.input_ids,
|
269 |
+
attention_mask=inputs.attention_mask,
|
270 |
+
max_new_tokens=1024,
|
271 |
+
do_sample=False,
|
272 |
+
eos_token_id=tokenizer.eos_token_id # Explicitly use EOS token ID
|
273 |
+
)
|
274 |
+
print(f"DEBUG: Output tokens shape = {outputs.shape}")
|
275 |
+
|
276 |
+
# Decode using the IBM example strategy
|
277 |
new_token_ids = outputs[0][input_length:]
|
278 |
+
print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}")
|
|
|
|
|
279 |
response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
|
280 |
+
print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}")
|
281 |
+
print("✅ HF Generation complete.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
else:
|
284 |
+
# --- GGUF Model Generation (Local) ---
|
285 |
+
print("💻 Generating response using GGUF model...")
|
286 |
+
response = model(
|
287 |
+
input_text,
|
288 |
+
max_new_tokens=1024,
|
289 |
+
stop=["<|end_of_text|>"], # Stop sequence for GGUF
|
290 |
+
temperature=0.3 # Example temperature
|
291 |
+
)
|
292 |
+
response = response.strip()
|
293 |
+
print("✅ GGUF Generation complete.")
|
294 |
|
295 |
+
# Handle empty response after generation
|
296 |
+
if not response:
|
297 |
+
response = "Sorry, I encountered an issue generating a response (empty)."
|
298 |
|
299 |
+
except Exception as e:
|
300 |
+
print(f"❌ Error during model generation or decoding: {e}")
|
301 |
+
# Keep the default error response defined above
|
302 |
|
303 |
+
# --- Final Response Handling ---
|
304 |
+
print(f"DEBUG: Final response variable before UI append = {repr(response)}")
|
|
|
|
|
|
|
|
|
305 |
|
306 |
+
# Add the final assistant reply to the UI history
|
307 |
+
ui_history.append({"role": "assistant", "content": response})
|
308 |
|
309 |
+
# Final yield with assistant reply
|
310 |
+
yield ui_history, "" # Update chat, keep textbox cleared
|
311 |
|
312 |
+
# --- Gradio UI ---
|
|
|
313 |
with gr.Blocks() as demo:
|
314 |
with gr.Column(variant="panel"):
|
315 |
gr.Markdown("### 🏨 Multi‑Hotel Chatbot Demo")
|
316 |
+
gr.Markdown(f"**Running:** {model_name}") # Displays HF name or GGUF path
|
317 |
|
318 |
hotel_selector = gr.Dropdown(
|
319 |
+
choices=available_hotels,
|
320 |
label="Hotel",
|
321 |
+
value=available_hotels[0] if available_hotels else None,
|
322 |
+
interactive=bool(available_hotels)
|
323 |
)
|
324 |
|
325 |
with gr.Row():
|
326 |
+
chatbot = gr.Chatbot(type="messages", label="Chat History", height=500)
|
|
|
327 |
|
328 |
msg = gr.Textbox(
|
329 |
show_label=False,
|
330 |
placeholder="Ask about the hotel..."
|
331 |
)
|
332 |
|
|
|
333 |
clear_btn = gr.Button("Clear")
|
334 |
+
clear_btn.click(lambda: (None, ""), None, [chatbot, msg])
|
335 |
|
|
|
336 |
msg.submit(
|
337 |
fn=chat,
|
338 |
inputs=[msg, chatbot, hotel_selector],
|
339 |
+
outputs=[chatbot, msg]
|
340 |
)
|
341 |
|
342 |
+
if is_space:
|
343 |
+
gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
|
344 |
|
345 |
# Enable streaming queue
|
346 |
demo.queue(default_concurrency_limit=2, max_size=32)
|
347 |
|
348 |
if __name__ == "__main__":
|
349 |
+
print("Launching Gradio Interface...")
|
350 |
demo.launch()
|
351 |
+
print("Gradio Interface closed.")
|
requirements.txt
CHANGED
@@ -1,9 +1,11 @@
|
|
|
|
1 |
aiofiles==24.1.0
|
2 |
annotated-types==0.7.0
|
3 |
anyio==4.9.0
|
4 |
certifi==2025.1.31
|
5 |
charset-normalizer==3.4.1
|
6 |
click==8.1.8
|
|
|
7 |
exceptiongroup==1.2.2
|
8 |
fastapi==0.115.12
|
9 |
ffmpy==0.5.0
|
|
|
1 |
+
accelerate
|
2 |
aiofiles==24.1.0
|
3 |
annotated-types==0.7.0
|
4 |
anyio==4.9.0
|
5 |
certifi==2025.1.31
|
6 |
charset-normalizer==3.4.1
|
7 |
click==8.1.8
|
8 |
+
ctransformers>=0.2.27
|
9 |
exceptiongroup==1.2.2
|
10 |
fastapi==0.115.12
|
11 |
ffmpy==0.5.0
|