Yoxas commited on
Commit
fe3ee42
·
verified ·
1 Parent(s): ee9bc42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -18,7 +18,6 @@ data = dataset["train"]
18
 
19
  # Convert the string embeddings to numerical arrays and ensure they are 2D
20
  def convert_and_ensure_2d_embeddings(example):
21
- # Clean the embedding string
22
  embedding_str = example['embedding']
23
  embedding_str = embedding_str.replace('\n', ' ').replace('...', '')
24
  embedding_list = list(map(float, embedding_str.strip("[]").split()))
@@ -44,11 +43,11 @@ data = data.map(flatten_embeddings)
44
  embeddings = np.array([example['embedding'] for example in data], dtype=np.float32)
45
 
46
  # Add FAISS index
47
- data.add_faiss_index_from_external_arrays("embedding", embeddings)
48
 
49
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
50
 
51
- # use quantization to lower GPU usage
52
  bnb_config = BitsAndBytesConfig(
53
  load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
54
  )
@@ -71,16 +70,16 @@ You are given the extracted parts of a long document and a question. Provide a c
71
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
72
 
73
  def search(query: str, k: int = 3):
74
- """a function that embeds a new query and returns the most probable results"""
75
- embedded_query = ST.encode(query) # embed new query
76
- scores, retrieved_examples = data.get_nearest_examples( # retrieve results
77
- "embedding", embedded_query, # compare our new embedded query with the dataset embeddings
78
- k=k # get only top k results
79
  )
80
  return scores, retrieved_examples
81
 
82
  def format_prompt(prompt, retrieved_documents, k):
83
- """using the retrieved documents we will prompt the model to generate our responses"""
84
  PROMPT = f"Question:{prompt}\nContext:"
85
  for idx in range(k):
86
  PROMPT += f"{retrieved_documents['text'][idx]}\n"
@@ -88,12 +87,12 @@ def format_prompt(prompt, retrieved_documents, k):
88
 
89
  @spaces.GPU(duration=150)
90
  def talk(prompt, history):
91
- k = 1 # number of retrieved documents
92
  scores, retrieved_documents = search(prompt, k)
93
  formatted_prompt = format_prompt(prompt, retrieved_documents, k)
94
- formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
95
  messages = [{"role": "system", "content": SYS_PROMPT}, {"role": "user", "content": formatted_prompt}]
96
- # tell the model to generate
97
  input_ids = tokenizer.apply_chat_template(
98
  messages,
99
  add_generation_prompt=True,
@@ -131,12 +130,12 @@ def talk(prompt, history):
131
  TITLE = "# RAG"
132
 
133
  DESCRIPTION = """
134
- A rag pipeline with a chatbot feature
135
  Resources used to build this project :
136
- * embedding model : https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
137
- * dataset : https://huggingface.co/datasets/not-lain/wikipedia
138
- * faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
139
- * chatbot : https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
140
  """
141
 
142
  demo = gr.ChatInterface(
@@ -150,7 +149,7 @@ demo = gr.ChatInterface(
150
  bubble_full_width=False,
151
  ),
152
  theme="Soft",
153
- examples=[["what's anarchy ? "]],
154
  title=TITLE,
155
  description=DESCRIPTION,
156
  )
 
18
 
19
  # Convert the string embeddings to numerical arrays and ensure they are 2D
20
  def convert_and_ensure_2d_embeddings(example):
 
21
  embedding_str = example['embedding']
22
  embedding_str = embedding_str.replace('\n', ' ').replace('...', '')
23
  embedding_list = list(map(float, embedding_str.strip("[]").split()))
 
43
  embeddings = np.array([example['embedding'] for example in data], dtype=np.float32)
44
 
45
  # Add FAISS index
46
+ data = data.add_faiss_index_from_external_arrays("embedding", embeddings)
47
 
48
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
49
 
50
+ # Use quantization to lower GPU usage
51
  bnb_config = BitsAndBytesConfig(
52
  load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
53
  )
 
70
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
71
 
72
  def search(query: str, k: int = 3):
73
+ """A function that embeds a new query and returns the most probable results."""
74
+ embedded_query = ST.encode(query) # Embed new query
75
+ scores, retrieved_examples = data.get_nearest_examples( # Retrieve results
76
+ "embedding", embedded_query, # Compare our new embedded query with the dataset embeddings
77
+ k=k # Get only top k results
78
  )
79
  return scores, retrieved_examples
80
 
81
  def format_prompt(prompt, retrieved_documents, k):
82
+ """Using the retrieved documents we will prompt the model to generate our responses."""
83
  PROMPT = f"Question:{prompt}\nContext:"
84
  for idx in range(k):
85
  PROMPT += f"{retrieved_documents['text'][idx]}\n"
 
87
 
88
  @spaces.GPU(duration=150)
89
  def talk(prompt, history):
90
+ k = 1 # Number of retrieved documents
91
  scores, retrieved_documents = search(prompt, k)
92
  formatted_prompt = format_prompt(prompt, retrieved_documents, k)
93
+ formatted_prompt = formatted_prompt[:2000] # To avoid GPU OOM
94
  messages = [{"role": "system", "content": SYS_PROMPT}, {"role": "user", "content": formatted_prompt}]
95
+ # Tell the model to generate
96
  input_ids = tokenizer.apply_chat_template(
97
  messages,
98
  add_generation_prompt=True,
 
130
  TITLE = "# RAG"
131
 
132
  DESCRIPTION = """
133
+ A RAG pipeline with a chatbot feature
134
  Resources used to build this project :
135
+ * Embedding model : https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
136
+ * Dataset : https://huggingface.co/datasets/not-lain/wikipedia
137
+ * FAISS docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
138
+ * Chatbot : https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
139
  """
140
 
141
  demo = gr.ChatInterface(
 
149
  bubble_full_width=False,
150
  ),
151
  theme="Soft",
152
+ examples=[["what's anarchy?"]],
153
  title=TITLE,
154
  description=DESCRIPTION,
155
  )