import os, torch from pathlib import Path from huggingface_hub import login from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate ) from llama_index.core.memory import ChatMemoryBuffer from llama_index.llms.huggingface import HuggingFaceLLM from llama_index.embeddings.langchain import LangchainEmbedding from langchain_community.embeddings import HuggingFaceEmbeddings # ---------- Constants ---------- SYSTEM_PROMPT = """ You are a friendly café assistant for Café Eleven. Your job is to: 1. Greet the customer warmly 2. Help them order food/drinks from our menu 3. Answer questions about ingredients, preparation, etc. 4. Process special requests (allergies, modifications) 5. Provide a friendly farewell Always be polite and helpful! """ WRAPPER_PROMPT = PromptTemplate( "[INST]<>\n" + SYSTEM_PROMPT + "\n<>\n\n{query_str} [/INST]" ) # ---------- 1. Login & Load Data ---------- login(token=os.environ["HF_TOKEN"]) docs = SimpleDirectoryReader( input_files=[str(p) for p in Path(".").glob("*.pdf")] ).load_data() embed_model = LangchainEmbedding( HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") ) Settings.embed_model = embed_model Settings.chunk_size = 512 index = VectorStoreIndex.from_documents(docs) # ---------- 2. Initialize Chat Engine ---------- _state = {"chat_engine": None} def get_chat_engine(): if _state["chat_engine"] is None: llm = HuggingFaceLLM( tokenizer_name="meta-llama/Llama-2-7b-chat-hf", model_name="meta-llama/Llama-2-7b-chat-hf", context_window=3900, max_new_tokens=256, generate_kwargs={"temperature": 0.2, "do_sample": True}, device_map="auto", model_kwargs={ "torch_dtype": torch.float16, "load_in_4bit": True, "use_auth_token": os.environ["HF_TOKEN"] }, system_prompt=SYSTEM_PROMPT, query_wrapper_prompt=WRAPPER_PROMPT, ) Settings.llm = llm memory = ChatMemoryBuffer.from_defaults(token_limit=2000) _state["chat_engine"] = index.as_chat_engine( chat_mode="condense_plus_context", memory=memory, system_prompt=SYSTEM_PROMPT, ) return _state["chat_engine"] # ---------- 3. Simple Chat Function ---------- def chat_with_cafe_eleven(message: str) -> str: if message.lower().strip() in {"quit", "exit", "done"}: return "Thank you for your order! We'll see you soon." engine = get_chat_engine() response = engine.chat(message).response return response # ---------- Example usage ---------- if __name__ == "__main__": while True: user_message = input("You: ") bot_response = chat_with_cafe_eleven(user_message) print("Café Eleven:", bot_response) if user_message.lower().strip() in {"quit", "exit", "done"}: break