Cafe-Chatbot / app.py
Copain22's picture
Update app.py
490ca90 verified
raw
history blame
3 kB
import os, torch
from pathlib import Path
from huggingface_hub import login
from llama_index.core import (
VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
)
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceEmbeddings
# ---------- Constants ----------
SYSTEM_PROMPT = """
You are a friendly café assistant for Café Eleven. Your job is to:
1. Greet the customer warmly
2. Help them order food/drinks from our menu
3. Answer questions about ingredients, preparation, etc.
4. Process special requests (allergies, modifications)
5. Provide a friendly farewell
Always be polite and helpful!
"""
WRAPPER_PROMPT = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "\n<</SYS>>\n\n{query_str} [/INST]"
)
# ---------- 1. Login & Load Data ----------
login(token=os.environ["HF_TOKEN"])
docs = SimpleDirectoryReader(
input_files=[str(p) for p in Path(".").glob("*.pdf")]
).load_data()
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
Settings.embed_model = embed_model
Settings.chunk_size = 512
index = VectorStoreIndex.from_documents(docs)
# ---------- 2. Initialize Chat Engine ----------
_state = {"chat_engine": None}
def get_chat_engine():
if _state["chat_engine"] is None:
llm = HuggingFaceLLM(
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
context_window=3900,
max_new_tokens=256,
generate_kwargs={"temperature": 0.2, "do_sample": True},
device_map="auto",
model_kwargs={
"torch_dtype": torch.float16,
"load_in_4bit": True,
"use_auth_token": os.environ["HF_TOKEN"]
},
system_prompt=SYSTEM_PROMPT,
query_wrapper_prompt=WRAPPER_PROMPT,
)
Settings.llm = llm
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
_state["chat_engine"] = index.as_chat_engine(
chat_mode="condense_plus_context",
memory=memory,
system_prompt=SYSTEM_PROMPT,
)
return _state["chat_engine"]
# ---------- 3. Simple Chat Function ----------
def chat_with_cafe_eleven(message: str) -> str:
if message.lower().strip() in {"quit", "exit", "done"}:
return "Thank you for your order! We'll see you soon."
engine = get_chat_engine()
response = engine.chat(message).response
return response
# ---------- Example usage ----------
if __name__ == "__main__":
while True:
user_message = input("You: ")
bot_response = chat_with_cafe_eleven(user_message)
print("Café Eleven:", bot_response)
if user_message.lower().strip() in {"quit", "exit", "done"}:
break