Spaces:
Sleeping
Sleeping
File size: 2,852 Bytes
880fc9b 2d8c5ba 0f9d9fe 880fc9b 2d8c5ba 0f9d9fe 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b 2d8c5ba 880fc9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import torch, os
import gradio as gr
from pathlib import Path
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import PromptTemplate
pdf_files = [str(p) for p in Path(".").glob("*.pdf")]
docs = SimpleDirectoryReader(input_files=pdf_files).load_data()
SYSTEM_PROMPT = """
You are a friendly café assistant for Café Eleven. Your job is to:
1. Greet the customer warmly
2. Help them place their order
3. Ask for pickup time
4. Suggest add-ons/extras from our menu
5. Confirm the complete order
Menu items are embedded in the document. Always:
- Be polite and professional
- Confirm order details clearly
- Suggest popular combinations
- Never make up items not in our menu
"""
wrapper_prompt = PromptTemplate(
"""<s>[INST] <<SYS>>
{system_prompt}
Current conversation:
{chat_history}
<</SYS>>
{query_str} [/INST]"""
)
llm = HuggingFaceLLM(
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
context_window=3900,
max_new_tokens=256,
generate_kwargs={"temperature": 0.2, "do_sample": True},
device_map="auto",
model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
system_prompt=SYSTEM_PROMPT,
query_wrapper_prompt=wrapper_prompt,
)
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512
# ---------- 3. Build the chat engine ----------
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
index = VectorStoreIndex.from_documents(docs)
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context",
memory=memory,
system_prompt=SYSTEM_PROMPT,
)
# ---------- 4. Gradio UI ----------
with gr.Blocks(title="Café Eleven Chat") as demo:
gr.Markdown("## ☕ Café Eleven Ordering Assistant \nType *quit* to end the chat.")
chatbot = gr.Chatbot()
user_txt = gr.Textbox(show_label=False, placeholder="Hi, I’d like a latte…")
clear = gr.Button("Clear")
def respond(message, chat_history):
if message.lower().strip() in {"quit", "exit", "done"}:
return "Thank you for your order! We'll see you soon.", chat_history
response = chat_engine.chat(message).response
chat_history.append((message, response))
return "", chat_history
user_txt.submit(respond, [user_txt, chatbot], [user_txt, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch()
|