looker01202 commited on
Commit
50aecff
·
1 Parent(s): c5c9847

stable gradio interface but requires inprovement

Browse files
Files changed (2) hide show
  1. app.py +78 -133
  2. app2.py +196 -0
app.py CHANGED
@@ -1,196 +1,141 @@
1
  import os
2
- import getpass
3
  import gradio as gr
4
  import torch
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
- # Detect execution environment: Spaces runs as user 'gradio'
8
- is_space = (getpass.getuser() == "user")
9
- print("RUNNING AS USER:", getpass.getuser())
 
10
 
11
-
12
- # Choose model checkpoints based on environment
13
  if is_space:
14
  primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
15
  fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
16
  else:
17
- # Local development: use smaller Qwen model only
18
  primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
19
  fallback_checkpoint = None
20
 
21
  # Device setup
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
- # Load tokenizer and model (with fallback on Spaces)
25
  def load_model():
26
- print(f"🔍 Trying to load PRIMARY: {primary_checkpoint}")
27
  try:
28
- #tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint)
29
- #model = AutoModelForCausalLM.from_pretrained(primary_checkpoint).to(device)
30
- # faster loading for large Granite model
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  primary_checkpoint,
33
  use_fast=True
34
  )
35
  model = AutoModelForCausalLM.from_pretrained(
36
  primary_checkpoint,
37
- torch_dtype=torch.float16, # 16‑bit weights
38
- low_cpu_mem_usage=True # memory‑efficient
39
- #device_map="auto" # auto shard on GPU
40
  ).to(device)
41
- print("✅ Loaded PRIMARY ")
42
  return tokenizer, model, primary_checkpoint
43
  except Exception as e:
44
- print("❌ PRIMARY failed:", e)
45
  if fallback_checkpoint:
46
  print(f"🔁 Falling back to {fallback_checkpoint}")
47
  tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
48
- model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
49
- print("✅ Loaded FALLBACK ")
50
  return tokenizer, model, fallback_checkpoint
51
  raise
52
 
53
  tokenizer, model, model_name = load_model()
54
 
55
- # Load hotel-specific documents from disk as (document_id, content) pairs
56
- def load_hotel_docs(hotel_id: str):
57
  path = os.path.join("knowledge", f"{hotel_id}.txt")
58
  if not os.path.exists(path):
59
  return []
60
- content = open(path, "r", encoding="utf-8").read().strip()
61
- # Use a single document; document_id can be hotel_id
62
- return [(f"{hotel_id}-info", content)]
63
 
64
- # Chat function integrating both local Qwen flow and IBM Granite RAG template with document roles
65
  def chat(message, history, hotel_id):
 
66
  if history is None:
67
- history = []
68
- # Append user message
69
- history.append(("user", message))
 
 
70
 
71
- # ==== Local development flow: simple chat via Qwen ====
72
- # ==== Local development flow: simple chat via Qwen ====
73
- # ==== Local development flow: simple chat via Qwen ====
74
- # ==== Local development flow: simple chat via Qwen ====
75
 
 
76
  if not is_space:
77
- # Build message dict list from history tuples
78
- msgs = [{"role": role, "content": content} for role, content in history]
79
- # Apply Qwen's chat template
80
  input_text = tokenizer.apply_chat_template(
81
  msgs,
82
  tokenize=False,
83
  add_generation_prompt=True
84
  )
85
- print("printing templated chat (pre-tokenizes), ready for sending to the model\n")
86
- print(input_text)
87
-
88
- # Generate response
89
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
90
- outputs = model.generate(
91
- inputs,
92
- max_new_tokens=1024,
93
- do_sample=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  )
 
 
95
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
96
- print("RAW DECODED:\n", decoded)
97
- #response = decoded.split("<|assistant|>")[-1].strip()
98
- response = decoded.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
99
- # history.append(("assistant", f"{response}\n_(Model: {model_name})_"))
100
- history.append(("assistant", f"{response}"))
101
-
102
- # Clear textbox by returning empty string as third output
103
- return history, history, ""
104
 
105
-
106
-
107
- # ==== Space production flow: IBM Granite RAG ====
108
- # ==== Space production flow: IBM Granite RAG ====
109
- # ==== Space production flow: IBM Granite RAG ====
110
- # ==== Space production flow: IBM Granite RAG ====
111
-
112
- # Prepare system prompt
113
- system_prompt = (
114
- "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
115
- "You are Alexander, the front desk assistant at Family Village Inn in Cyprus."
116
- "You only know what’s in the provided documents."
117
- "Greet guests politely, but only engage in general chit‑chat if it helps answer their question about the hotel."
118
- "Write the response to the user's questions about the hotel by strictly aligning with the facts in the provided documents. "
119
- "If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data."
120
- )
121
- system_prompt = (
122
- "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
123
- "You are Alexander, the front desk assistant at Family Village Inn in Cyprus. "
124
- "You only know what’s in the provided documents. "
125
- "Greet guests politely, and only engage in general chit‑chat if it helps answer their question about the hotel."
126
- "Answer their questions by strictly using the facts in the documents. "
127
- "If the information isn’t available, say: "
128
- "\"I'm sorry, but I don't have enough information to answer that question.\""
129
- )
130
-
131
-
132
- # Start building message list
133
- messages = [{"role": "system", "content": system_prompt}]
134
- # Inject each document with role 'document' and metadata
135
- for doc_id, doc_content in load_hotel_docs(hotel_id):
136
- messages.append({
137
- "role": "document",
138
- "content": doc_content,
139
- "document_id": doc_id
140
- })
141
- # Finally add the user turn
142
- messages.append({"role": "user", "content": message})
143
-
144
- # Apply the model's chat template (IBM-trained template)
145
- input_text = tokenizer.apply_chat_template(
146
- messages,
147
- tokenize=False,
148
- add_generation_prompt=True
149
- )
150
-
151
- print("printing templated chat (pre-tokenized), ready for sending to the model\n")
152
- print(input_text)
153
-
154
- # Tokenize, generate, and decode
155
- inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
156
- outputs = model.generate(
157
- inputs,
158
- max_new_tokens=1024,
159
- do_sample=False
160
- )
161
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
162
- print("RAW DECODED:\n", decoded)
163
- # Extract the assistant's reply
164
- response = decoded.split("<|start_of_role|>assistant")[-1].split("<|end_of_role|>")[0]
165
- #history.append(("assistant", f"{response}\n_(Model: {model_name})_"))
166
- history.append(("assistant", f"{response}"))
167
-
168
- # Clear textbox by returning empty string as third output
169
- return history, history, ""
170
 
171
  # Available hotels
172
- hotel_ids = [
173
- "cyprus-guesthouse-family",
174
- "coastal-villa-family",
175
- "village-inn-family"
176
- ]
177
-
178
- # Gradio interface setup
179
- demo = gr.Blocks()
180
- with demo:
181
- gr.Markdown("### 🏨 Hotel Chatbot Demo")
182
- gr.Markdown(f"Currently running: **{model_name}**", elem_id="model‑status")
183
-
184
  with gr.Row():
185
- hotel_selector = gr.Dropdown(hotel_ids, label="Choose a hotel", value=hotel_ids[0])
186
- chatbot = gr.Chatbot()
187
- msg = gr.Textbox(placeholder="Ask me about the hotel...", show_label=False)
188
  msg.submit(
189
  fn=chat,
190
  inputs=[msg, chatbot, hotel_selector],
191
- outputs=[chatbot, chatbot, msg]
192
  )
193
- gr.Markdown("⚠️ **Reminder:** Pause the Space when done to avoid GPU charges.")
 
 
 
194
 
195
  if __name__ == "__main__":
196
  demo.launch()
 
1
  import os
 
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
+ # Detect Space environment by SPACE_ID env var
7
+ env = os.environ
8
+ is_space = env.get("SPACE_ID") is not None
9
+ print("RUNNING IN SPACE?", is_space)
10
 
11
+ # Model selection
 
12
  if is_space:
13
  primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
14
  fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
15
  else:
 
16
  primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
17
  fallback_checkpoint = None
18
 
19
  # Device setup
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
+ # Load model with fallback
23
  def load_model():
24
+ print(f"🔍 Loading model: {primary_checkpoint}")
25
  try:
 
 
 
26
  tokenizer = AutoTokenizer.from_pretrained(
27
  primary_checkpoint,
28
  use_fast=True
29
  )
30
  model = AutoModelForCausalLM.from_pretrained(
31
  primary_checkpoint,
32
+ torch_dtype=torch.float16,
33
+ low_cpu_mem_usage=True
 
34
  ).to(device)
35
+ print(f"✅ Loaded primary {primary_checkpoint}")
36
  return tokenizer, model, primary_checkpoint
37
  except Exception as e:
38
+ print(f"❌ Primary load failed: {e}")
39
  if fallback_checkpoint:
40
  print(f"🔁 Falling back to {fallback_checkpoint}")
41
  tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
42
+ model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
43
+ print(f"✅ Loaded fallback {fallback_checkpoint}")
44
  return tokenizer, model, fallback_checkpoint
45
  raise
46
 
47
  tokenizer, model, model_name = load_model()
48
 
49
+ # Load hotel docs
50
+ def load_hotel_docs(hotel_id):
51
  path = os.path.join("knowledge", f"{hotel_id}.txt")
52
  if not os.path.exists(path):
53
  return []
54
+ content = open(path, encoding="utf-8").read().strip()
55
+ return [(hotel_id, content)]
 
56
 
57
+ # Chat function
58
  def chat(message, history, hotel_id):
59
+ # Convert incoming UI history (list of dicts) to tuple list
60
  if history is None:
61
+ history_tuples = []
62
+ else:
63
+ history_tuples = [(m['role'], m['content']) for m in history]
64
+ # Append the new user turn
65
+ history_tuples.append(("user", message))
66
 
67
+ # Yield user message immediately
68
+ ui_history = [{"role": r, "content": c} for r, c in history_tuples]
69
+ yield ui_history, ""
 
70
 
71
+ # Local Qwen flow
72
  if not is_space:
73
+ # Build messages including the new user turn
74
+ msgs = [{"role": role, "content": content} for role, content in history_tuples]
 
75
  input_text = tokenizer.apply_chat_template(
76
  msgs,
77
  tokenize=False,
78
  add_generation_prompt=True
79
  )
 
 
 
 
80
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
81
+ outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False)
82
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
83
+ # Extract assistant response
84
+ response = decoded.split("<|im_start|>assistant")[-1]
85
+ response = response.split("<|im_end|>")[0].strip()
86
+ else:
87
+ # IBM Granite RAG flow
88
+ system_prompt = (
89
+ "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
90
+ "You are Alexander, the front desk assistant at Family Village Inn in Cyprus. "
91
+ "You only know what's in the provided documents. "
92
+ "Greet guests politely, but only chit-chat when it helps answer hotel questions. "
93
+ "Answer using only facts from the documents; if unavailable, say you cannot answer."
94
+ )
95
+ messages = [{"role": "system", "content": system_prompt}]
96
+ for doc_id, doc_content in load_hotel_docs(hotel_id):
97
+ messages.append({"role": "document", "content": doc_content, "document_id": doc_id})
98
+ # Include full history including the new user message
99
+ for role, content in history_tuples:
100
+ messages.append({"role": role, "content": content})
101
+ input_text = tokenizer.apply_chat_template(
102
+ messages,
103
+ tokenize=False,
104
+ add_generation_prompt=True
105
  )
106
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
107
+ outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False)
108
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
109
+ response = decoded.split("<|start_of_role|>assistant<|end_of_role|>")[-1]
110
+ response = response.split("<|end_of_text|>")[0].strip()
111
+ # Append assistant reply to history
112
+ history_tuples.append(("assistant", f"{response}"))
 
 
 
 
113
 
114
+ # Final yield with assistant reply
115
+ ui_history = [{"role": r, "content": c} for r, c in history_tuples]
116
+ yield ui_history, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  # Available hotels
119
+ hotel_ids = ["cyprus-guesthouse-family", "coastal-villa-family", "village-inn-family"]
120
+
121
+ # Gradio UI
122
+ with gr.Blocks() as demo:
123
+ gr.Markdown("### 🏨 Multi-Hotel Chatbot Demo")
124
+ gr.Markdown(f"**Running:** {model_name}")
125
+ hotel_selector = gr.Dropdown(hotel_ids, label="Hotel", value=hotel_ids[0])
126
+ #chatbot = gr.Chatbot(type="messages")
 
 
 
 
127
  with gr.Row():
128
+ chatbot = gr.Chatbot(type="messages")
129
+ msg = gr.Textbox(show_label=False, placeholder="Ask about the hotel...")
 
130
  msg.submit(
131
  fn=chat,
132
  inputs=[msg, chatbot, hotel_selector],
133
+ outputs=[chatbot, msg]
134
  )
135
+ gr.Markdown("⚠️ Pause the Space when done to avoid charges.")
136
+
137
+ # Enable streaming queue for generator-based chat
138
+ demo.queue()
139
 
140
  if __name__ == "__main__":
141
  demo.launch()
app2.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import getpass
3
+ import gradio as gr
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ # Detect execution environment: Spaces runs as user 'gradio'
8
+ is_space = (getpass.getuser() == "user")
9
+ print("RUNNING AS USER:", getpass.getuser())
10
+
11
+
12
+ # Choose model checkpoints based on environment
13
+ if is_space:
14
+ primary_checkpoint = "ibm-granite/granite-3.3-2b-instruct"
15
+ fallback_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
16
+ else:
17
+ # Local development: use smaller Qwen model only
18
+ primary_checkpoint = "Qwen/Qwen2.5-0.5B-Instruct"
19
+ fallback_checkpoint = None
20
+
21
+ # Device setup
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+
24
+ # Load tokenizer and model (with fallback on Spaces)
25
+ def load_model():
26
+ print(f"🔍 Trying to load PRIMARY: {primary_checkpoint}")
27
+ try:
28
+ #tokenizer = AutoTokenizer.from_pretrained(primary_checkpoint)
29
+ #model = AutoModelForCausalLM.from_pretrained(primary_checkpoint).to(device)
30
+ # faster loading for large Granite model
31
+ tokenizer = AutoTokenizer.from_pretrained(
32
+ primary_checkpoint,
33
+ use_fast=True
34
+ )
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ primary_checkpoint,
37
+ torch_dtype=torch.float16, # 16‑bit weights
38
+ low_cpu_mem_usage=True # memory‑efficient
39
+ #device_map="auto" # auto shard on GPU
40
+ ).to(device)
41
+ print("✅ Loaded PRIMARY ✓")
42
+ return tokenizer, model, primary_checkpoint
43
+ except Exception as e:
44
+ print("❌ PRIMARY failed:", e)
45
+ if fallback_checkpoint:
46
+ print(f"🔁 Falling back to {fallback_checkpoint}")
47
+ tokenizer = AutoTokenizer.from_pretrained(fallback_checkpoint)
48
+ model = AutoModelForCausalLM.from_pretrained(fallback_checkpoint).to(device)
49
+ print("✅ Loaded FALLBACK ✓")
50
+ return tokenizer, model, fallback_checkpoint
51
+ raise
52
+
53
+ tokenizer, model, model_name = load_model()
54
+
55
+ # Load hotel-specific documents from disk as (document_id, content) pairs
56
+ def load_hotel_docs(hotel_id: str):
57
+ path = os.path.join("knowledge", f"{hotel_id}.txt")
58
+ if not os.path.exists(path):
59
+ return []
60
+ content = open(path, "r", encoding="utf-8").read().strip()
61
+ # Use a single document; document_id can be hotel_id
62
+ return [(f"{hotel_id}-info", content)]
63
+
64
+ # Chat function integrating both local Qwen flow and IBM Granite RAG template with document roles
65
+ def chat(message, history, hotel_id):
66
+ if history is None:
67
+ history = []
68
+ # Append user message
69
+ history.append(("user", message))
70
+
71
+ # ==== Local development flow: simple chat via Qwen ====
72
+ # ==== Local development flow: simple chat via Qwen ====
73
+ # ==== Local development flow: simple chat via Qwen ====
74
+ # ==== Local development flow: simple chat via Qwen ====
75
+
76
+ if not is_space:
77
+ # Build message dict list from history tuples
78
+ msgs = [{"role": role, "content": content} for role, content in history]
79
+ # Apply Qwen's chat template
80
+ input_text = tokenizer.apply_chat_template(
81
+ msgs,
82
+ tokenize=False,
83
+ add_generation_prompt=True
84
+ )
85
+ print("printing templated chat (pre-tokenizes), ready for sending to the model\n")
86
+ print(input_text)
87
+
88
+ # Generate response
89
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
90
+ outputs = model.generate(
91
+ inputs,
92
+ max_new_tokens=1024,
93
+ do_sample=False
94
+ )
95
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
96
+ print("RAW DECODED:\n", decoded)
97
+ #response = decoded.split("<|assistant|>")[-1].strip()
98
+ response = decoded.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
99
+ # history.append(("assistant", f"{response}\n_(Model: {model_name})_"))
100
+ history.append(("assistant", f"{response}"))
101
+
102
+ # Clear textbox by returning empty string as third output
103
+ return history, history, ""
104
+
105
+
106
+
107
+ # ==== Space production flow: IBM Granite RAG ====
108
+ # ==== Space production flow: IBM Granite RAG ====
109
+ # ==== Space production flow: IBM Granite RAG ====
110
+ # ==== Space production flow: IBM Granite RAG ====
111
+
112
+ # Prepare system prompt
113
+ system_prompt = (
114
+ "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
115
+ "You are Alexander, the front desk assistant at Family Village Inn in Cyprus."
116
+ "You only know what’s in the provided documents."
117
+ "Greet guests politely, but only engage in general chit‑chat if it helps answer their question about the hotel."
118
+ "Write the response to the user's questions about the hotel by strictly aligning with the facts in the provided documents. "
119
+ "If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data."
120
+ )
121
+ system_prompt = (
122
+ "Knowledge Cutoff Date: April 2024. Today's Date: April 12, 2025. "
123
+ "You are Alexander, the front desk assistant at Family Village Inn in Cyprus. "
124
+ "You only know what’s in the provided documents. "
125
+ "Greet guests politely, and only engage in general chit‑chat if it helps answer their question about the hotel."
126
+ "Answer their questions by strictly using the facts in the documents. "
127
+ "If the information isn’t available, say: "
128
+ "\"I'm sorry, but I don't have enough information to answer that question.\""
129
+ )
130
+
131
+
132
+ # Start building message list
133
+ messages = [{"role": "system", "content": system_prompt}]
134
+ # Inject each document with role 'document' and metadata
135
+ for doc_id, doc_content in load_hotel_docs(hotel_id):
136
+ messages.append({
137
+ "role": "document",
138
+ "content": doc_content,
139
+ "document_id": doc_id
140
+ })
141
+ # Finally add the user turn
142
+ messages.append({"role": "user", "content": message})
143
+
144
+ # Apply the model's chat template (IBM-trained template)
145
+ input_text = tokenizer.apply_chat_template(
146
+ messages,
147
+ tokenize=False,
148
+ add_generation_prompt=True
149
+ )
150
+
151
+ print("printing templated chat (pre-tokenized), ready for sending to the model\n")
152
+ print(input_text)
153
+
154
+ # Tokenize, generate, and decode
155
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
156
+ outputs = model.generate(
157
+ inputs,
158
+ max_new_tokens=1024,
159
+ do_sample=False
160
+ )
161
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
162
+ print("RAW DECODED:\n", decoded)
163
+ # Extract the assistant's reply
164
+ response = decoded.split("<|start_of_role|>assistant")[-1].split("<|end_of_role|>")[0]
165
+ #history.append(("assistant", f"{response}\n_(Model: {model_name})_"))
166
+ history.append(("assistant", f"{response}"))
167
+
168
+ # Clear textbox by returning empty string as third output
169
+ return history, history, ""
170
+
171
+ # Available hotels
172
+ hotel_ids = [
173
+ "cyprus-guesthouse-family",
174
+ "coastal-villa-family",
175
+ "village-inn-family"
176
+ ]
177
+
178
+ # Gradio interface setup
179
+ demo = gr.Blocks()
180
+ with demo:
181
+ gr.Markdown("### 🏨 Hotel Chatbot Demo")
182
+ gr.Markdown(f"Currently running: **{model_name}**", elem_id="model‑status")
183
+
184
+ with gr.Row():
185
+ hotel_selector = gr.Dropdown(hotel_ids, label="Choose a hotel", value=hotel_ids[0])
186
+ chatbot = gr.Chatbot()
187
+ msg = gr.Textbox(placeholder="Ask me about the hotel...", show_label=False)
188
+ msg.submit(
189
+ fn=chat,
190
+ inputs=[msg, chatbot, hotel_selector],
191
+ outputs=[chatbot, chatbot, msg]
192
+ )
193
+ gr.Markdown("⚠️ **Reminder:** Pause the Space when done to avoid GPU charges.")
194
+
195
+ if __name__ == "__main__":
196
+ demo.launch()