Spaces:
Sleeping
Sleeping
File size: 2,996 Bytes
490ca90 7d8a35f 5d4d3e2 526f9f1 5d4d3e2 51ce3a5 0f9d9fe 490ca90 5d4d3e2 490ca90 5d4d3e2 490ca90 5d4d3e2 490ca90 5d4d3e2 490ca90 5d4d3e2 490ca90 5d4d3e2 2f49d9a 5d4d3e2 2f49d9a 5d4d3e2 2f49d9a 5d4d3e2 490ca90 5d4d3e2 490ca90 5d4d3e2 490ca90 7a0e378 490ca90 2f49d9a 490ca90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os, torch
from pathlib import Path
from huggingface_hub import login
from llama_index.core import (
VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
)
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceEmbeddings
# ---------- Constants ----------
SYSTEM_PROMPT = """
You are a friendly café assistant for Café Eleven. Your job is to:
1. Greet the customer warmly
2. Help them order food/drinks from our menu
3. Answer questions about ingredients, preparation, etc.
4. Process special requests (allergies, modifications)
5. Provide a friendly farewell
Always be polite and helpful!
"""
WRAPPER_PROMPT = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "\n<</SYS>>\n\n{query_str} [/INST]"
)
# ---------- 1. Login & Load Data ----------
login(token=os.environ["HF_TOKEN"])
docs = SimpleDirectoryReader(
input_files=[str(p) for p in Path(".").glob("*.pdf")]
).load_data()
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
Settings.embed_model = embed_model
Settings.chunk_size = 512
index = VectorStoreIndex.from_documents(docs)
# ---------- 2. Initialize Chat Engine ----------
_state = {"chat_engine": None}
def get_chat_engine():
if _state["chat_engine"] is None:
llm = HuggingFaceLLM(
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
context_window=3900,
max_new_tokens=256,
generate_kwargs={"temperature": 0.2, "do_sample": True},
device_map="auto",
model_kwargs={
"torch_dtype": torch.float16,
"load_in_4bit": True,
"use_auth_token": os.environ["HF_TOKEN"]
},
system_prompt=SYSTEM_PROMPT,
query_wrapper_prompt=WRAPPER_PROMPT,
)
Settings.llm = llm
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
_state["chat_engine"] = index.as_chat_engine(
chat_mode="condense_plus_context",
memory=memory,
system_prompt=SYSTEM_PROMPT,
)
return _state["chat_engine"]
# ---------- 3. Simple Chat Function ----------
def chat_with_cafe_eleven(message: str) -> str:
if message.lower().strip() in {"quit", "exit", "done"}:
return "Thank you for your order! We'll see you soon."
engine = get_chat_engine()
response = engine.chat(message).response
return response
# ---------- Example usage ----------
if __name__ == "__main__":
while True:
user_message = input("You: ")
bot_response = chat_with_cafe_eleven(user_message)
print("Café Eleven:", bot_response)
if user_message.lower().strip() in {"quit", "exit", "done"}:
break
|