Spaces:
Sleeping
Sleeping
import os, torch | |
from pathlib import Path | |
from huggingface_hub import login | |
from llama_index.core import ( | |
VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate | |
) | |
from llama_index.core.memory import ChatMemoryBuffer | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
from llama_index.embeddings.langchain import LangchainEmbedding | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
# ---------- Constants ---------- | |
SYSTEM_PROMPT = """ | |
You are a friendly café assistant for Café Eleven. Your job is to: | |
1. Greet the customer warmly | |
2. Help them order food/drinks from our menu | |
3. Answer questions about ingredients, preparation, etc. | |
4. Process special requests (allergies, modifications) | |
5. Provide a friendly farewell | |
Always be polite and helpful! | |
""" | |
WRAPPER_PROMPT = PromptTemplate( | |
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "\n<</SYS>>\n\n{query_str} [/INST]" | |
) | |
# ---------- 1. Login & Load Data ---------- | |
login(token=os.environ["HF_TOKEN"]) | |
docs = SimpleDirectoryReader( | |
input_files=[str(p) for p in Path(".").glob("*.pdf")] | |
).load_data() | |
embed_model = LangchainEmbedding( | |
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
) | |
Settings.embed_model = embed_model | |
Settings.chunk_size = 512 | |
index = VectorStoreIndex.from_documents(docs) | |
# ---------- 2. Initialize Chat Engine ---------- | |
_state = {"chat_engine": None} | |
def get_chat_engine(): | |
if _state["chat_engine"] is None: | |
llm = HuggingFaceLLM( | |
tokenizer_name="meta-llama/Llama-2-7b-chat-hf", | |
model_name="meta-llama/Llama-2-7b-chat-hf", | |
context_window=3900, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.2, "do_sample": True}, | |
device_map="auto", | |
model_kwargs={ | |
"torch_dtype": torch.float16, | |
"load_in_4bit": True, | |
"use_auth_token": os.environ["HF_TOKEN"] | |
}, | |
system_prompt=SYSTEM_PROMPT, | |
query_wrapper_prompt=WRAPPER_PROMPT, | |
) | |
Settings.llm = llm | |
memory = ChatMemoryBuffer.from_defaults(token_limit=2000) | |
_state["chat_engine"] = index.as_chat_engine( | |
chat_mode="condense_plus_context", | |
memory=memory, | |
system_prompt=SYSTEM_PROMPT, | |
) | |
return _state["chat_engine"] | |
# ---------- 3. Simple Chat Function ---------- | |
def chat_with_cafe_eleven(message: str) -> str: | |
if message.lower().strip() in {"quit", "exit", "done"}: | |
return "Thank you for your order! We'll see you soon." | |
engine = get_chat_engine() | |
response = engine.chat(message).response | |
return response | |
# ---------- Example usage ---------- | |
if __name__ == "__main__": | |
while True: | |
user_message = input("You: ") | |
bot_response = chat_with_cafe_eleven(user_message) | |
print("Café Eleven:", bot_response) | |
if user_message.lower().strip() in {"quit", "exit", "done"}: | |
break | |