Spaces:

Copain22
/

Cafe-Chatbot

Running on Zero

App Files Files Community

Cafe-Chatbot / app.py

Copain22

Update app.py

8ca6b5f verified 1 day ago

raw

history blame contribute delete

4.52 kB

	# 0. Install custom transformers and imports
	import os
	os.system("pip install git+https://github.com/shumingma/transformers.git")
	os.system("pip install python-docx")

	import threading
	import torch
	import torch._dynamo
	torch._dynamo.config.suppress_errors = True

	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	TextIteratorStreamer,
	)
	import gradio as gr
	import spaces
	from docx import Document

	# 1. System prompt
	SYSTEM_PROMPT = """
	You are a friendly café assistant for Café Eleven. Your job is to:
	1. Greet the customer warmly.
	2. Help them order food and drinks from our menu.
	3. Ask the customer for their desired pickup time.
	4. Confirm the pickup time before ending the conversation.
	5. Answer questions about ingredients, preparation, etc.
	6. Handle special requests (allergies, modifications) politely.
	7. Provide calorie information if asked.
	Always be polite, helpful, and ensure the customer feels welcomed and cared for!
	"""

	MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"

	# 2. Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)

	print(f"Model loaded on device: {model.device}")

	# 3. Load Menu Text from Word document
	def load_menu_text(docx_path):
	doc = Document(docx_path)
	full_text = []
	for para in doc.paragraphs:
	if para.text.strip():
	full_text.append(para.text.strip())
	return "\n".join(full_text)

	MENU_TEXT = load_menu_text("menu.docx")
	print(f"Loaded menu text from Word document.")

	# 4. Simple retrieval function (search inside MENU_TEXT)
	def retrieve_context(question, top_k=3):
	question = question.lower()
	sentences = MENU_TEXT.split("\n")
	matches = [s for s in sentences if any(word in s.lower() for word in question.split())]
	if not matches:
	return "Sorry, I couldn't find relevant menu information."
	return "\n\n".join(matches[:top_k])

	# 5. Chat respond function
	@spaces.GPU
	def respond(
	message: str,
	history: list[tuple[str, str]],
	system_message: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	):
	context = retrieve_context(message)

	messages = [{"role": "system", "content": system_message}]
	for user_msg, bot_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})
	messages.append({"role": "user", "content": f"{message}\n\nRelevant menu info:\n{context}"})

	prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(
	tokenizer, skip_prompt=True, skip_special_tokens=True
	)
	generate_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	)
	thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
	thread.start()

	response = ""
	for new_text in streamer:
	response += new_text
	yield response

	# 6. Gradio ChatInterface
	demo = gr.ChatInterface(
	fn=respond,
	title="Café Eleven Assistant",
	description="Friendly café assistant based on real menu loaded from Word document!",
	examples=[
	[
	"What kinds of burgers do you have?",
	SYSTEM_PROMPT.strip(),
	512,
	0.7,
	0.95,
	],
	[
	"Do you have gluten-free pastries?",
	SYSTEM_PROMPT.strip(),
	512,
	0.7,
	0.95,
	],
	],
	additional_inputs=[
	gr.Textbox(
	value=SYSTEM_PROMPT.strip(),
	label="System message"
	),
	gr.Slider(
	minimum=1,
	maximum=2048,
	value=512,
	step=1,
	label="Max new tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)"
	),
	],
	)

	# 7. Launch
	if __name__ == "__main__":
	demo.launch(share=True)