File size: 2,996 Bytes
490ca90
7d8a35f
5d4d3e2
526f9f1
5d4d3e2
 
 
 
 
 
51ce3a5
0f9d9fe
490ca90
5d4d3e2
 
 
 
 
 
 
490ca90
 
5d4d3e2
 
 
 
 
490ca90
5d4d3e2
 
 
 
 
490ca90
5d4d3e2
 
 
 
 
 
 
 
490ca90
 
5d4d3e2
 
 
 
 
 
 
 
490ca90
5d4d3e2
2f49d9a
5d4d3e2
 
 
 
 
 
2f49d9a
5d4d3e2
 
 
 
 
 
 
2f49d9a
5d4d3e2
 
490ca90
 
5d4d3e2
490ca90
5d4d3e2
 
 
490ca90
7a0e378
490ca90
2f49d9a
490ca90
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os, torch
from pathlib import Path
from huggingface_hub import login

from llama_index.core import (
    VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
)
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_community.embeddings import HuggingFaceEmbeddings

# ---------- Constants ----------
SYSTEM_PROMPT = """
You are a friendly café assistant for Café Eleven. Your job is to:
1. Greet the customer warmly
2. Help them order food/drinks from our menu
3. Answer questions about ingredients, preparation, etc.
4. Process special requests (allergies, modifications)
5. Provide a friendly farewell
Always be polite and helpful!
"""

WRAPPER_PROMPT = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "\n<</SYS>>\n\n{query_str} [/INST]"
)

# ---------- 1. Login & Load Data ----------
login(token=os.environ["HF_TOKEN"])

docs = SimpleDirectoryReader(
    input_files=[str(p) for p in Path(".").glob("*.pdf")]
).load_data()

embed_model = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
Settings.embed_model = embed_model
Settings.chunk_size = 512

index = VectorStoreIndex.from_documents(docs)

# ---------- 2. Initialize Chat Engine ----------
_state = {"chat_engine": None}

def get_chat_engine():
    if _state["chat_engine"] is None:
        llm = HuggingFaceLLM(
            tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
            model_name="meta-llama/Llama-2-7b-chat-hf",
            context_window=3900,
            max_new_tokens=256,
            generate_kwargs={"temperature": 0.2, "do_sample": True},
            device_map="auto",
            model_kwargs={
                "torch_dtype": torch.float16,
                "load_in_4bit": True,
                "use_auth_token": os.environ["HF_TOKEN"]
            },
            system_prompt=SYSTEM_PROMPT,
            query_wrapper_prompt=WRAPPER_PROMPT,
        )
        Settings.llm = llm

        memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
        _state["chat_engine"] = index.as_chat_engine(
            chat_mode="condense_plus_context",
            memory=memory,
            system_prompt=SYSTEM_PROMPT,
        )
    return _state["chat_engine"]

# ---------- 3. Simple Chat Function ----------
def chat_with_cafe_eleven(message: str) -> str:
    if message.lower().strip() in {"quit", "exit", "done"}:
        return "Thank you for your order! We'll see you soon."

    engine = get_chat_engine()
    response = engine.chat(message).response
    return response

# ---------- Example usage ----------
if __name__ == "__main__":
    while True:
        user_message = input("You: ")
        bot_response = chat_with_cafe_eleven(user_message)
        print("Café Eleven:", bot_response)
        
        if user_message.lower().strip() in {"quit", "exit", "done"}:
            break