Spaces:
Sleeping
Sleeping
import torch, os | |
import gradio as gr | |
from pathlib import Path | |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings | |
from llama_index.core.memory import ChatMemoryBuffer | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
from llama_index.embeddings.langchain import LangchainEmbedding | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from llama_index.core import PromptTemplate | |
pdf_files = [str(p) for p in Path(".").glob("*.pdf")] | |
docs = SimpleDirectoryReader(input_files=pdf_files).load_data() | |
SYSTEM_PROMPT = """ | |
You are a friendly café assistant for Café Eleven. Your job is to: | |
1. Greet the customer warmly | |
2. Help them place their order | |
3. Ask for pickup time | |
4. Suggest add-ons/extras from our menu | |
5. Confirm the complete order | |
Menu items are embedded in the document. Always: | |
- Be polite and professional | |
- Confirm order details clearly | |
- Suggest popular combinations | |
- Never make up items not in our menu | |
""" | |
wrapper_prompt = PromptTemplate( | |
"""<s>[INST] <<SYS>> | |
{system_prompt} | |
Current conversation: | |
{chat_history} | |
<</SYS>> | |
{query_str} [/INST]""" | |
) | |
llm = HuggingFaceLLM( | |
tokenizer_name="meta-llama/Llama-2-7b-chat-hf", | |
model_name="meta-llama/Llama-2-7b-chat-hf", | |
context_window=3900, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.2, "do_sample": True}, | |
device_map="auto", | |
model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True}, | |
system_prompt=SYSTEM_PROMPT, | |
query_wrapper_prompt=wrapper_prompt, | |
) | |
embed_model = LangchainEmbedding( | |
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
) | |
Settings.llm = llm | |
Settings.embed_model = embed_model | |
Settings.chunk_size = 512 | |
# ---------- 3. Build the chat engine ---------- | |
memory = ChatMemoryBuffer.from_defaults(token_limit=2000) | |
index = VectorStoreIndex.from_documents(docs) | |
chat_engine = index.as_chat_engine( | |
chat_mode="condense_plus_context", | |
memory=memory, | |
system_prompt=SYSTEM_PROMPT, | |
) | |
# ---------- 4. Gradio UI ---------- | |
with gr.Blocks(title="Café Eleven Chat") as demo: | |
gr.Markdown("## ☕ Café Eleven Ordering Assistant \nType *quit* to end the chat.") | |
chatbot = gr.Chatbot() | |
user_txt = gr.Textbox(show_label=False, placeholder="Hi, I’d like a latte…") | |
clear = gr.Button("Clear") | |
def respond(message, chat_history): | |
if message.lower().strip() in {"quit", "exit", "done"}: | |
return "Thank you for your order! We'll see you soon.", chat_history | |
response = chat_engine.chat(message).response | |
chat_history.append((message, response)) | |
return "", chat_history | |
user_txt.submit(respond, [user_txt, chatbot], [user_txt, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.queue(concurrency_count=3).launch() | |