Ankerkraut commited on
Commit
d0fc0e2
·
1 Parent(s): 785cd03

use cached model fix

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -62,7 +62,7 @@ _model_cache = None
62
 
63
  @spaces.GPU
64
  def get_model():
65
- global _model_cache
66
  if _model_cache is None:
67
  # Load model only if it's not already loaded
68
  print("Loading model for the first time...")
@@ -157,7 +157,7 @@ def search_qdrant_with_context(query_text, collection_name, top_k=3):
157
 
158
  @spaces.GPU
159
  def interactive_chat(query):
160
-
161
  collection_name = "products"
162
  if "rezept" in query.lower() or "gericht" in query.lower():
163
  collection_name = "recipes"
@@ -166,7 +166,7 @@ def interactive_chat(query):
166
  print(collection_name)
167
  print(query)
168
  if len(query.split()) < 3:
169
- return generate_response(query, "Der Kunde muss womöglich detailliertere Angaben machen, entscheide, was du sagst.", last_messages, 512, 0.2, 0.95, generator)
170
  context = [document["document"] for document in search_qdrant_with_context(query, collection_name)]
171
 
172
  system_message = f"""<|im_start|>system Rolle: Du bist ein KI-Assistent der die Informationen in Relation zum Kontext bewertet.
@@ -179,12 +179,12 @@ def interactive_chat(query):
179
  {query}
180
  <|im_end|>
181
  <|im_start|>assistant"""
182
- refined_context = generator_mini(system_message, do_sample=True, padding=True, truncation=True, top_p=0.95, max_new_tokens=100)
183
  # Retrieve relevant context from Qdrant
184
  print(f"""Refined context: {refined_context[0]["generated_text"].split("assistant").pop()}""")
185
 
186
  context = [document["document"] for document in search_qdrant_with_context(query + " " + refined_context[0]["generated_text"].split("assistant\n").pop(), collection_name)]
187
- answer = generate_response(query, context, last_messages, 512, 0.2, 0.95, generator)
188
  full_conv = f"<|im_start|>user {query}<|im_end|><|im_start|>assistent {answer}<|im_end|>"
189
  # if len(last_messages) > 5:
190
  # last_messages.pop(0)
 
62
 
63
  @spaces.GPU
64
  def get_model():
65
+ print(f"Model: {_model_cache}")
66
  if _model_cache is None:
67
  # Load model only if it's not already loaded
68
  print("Loading model for the first time...")
 
157
 
158
  @spaces.GPU
159
  def interactive_chat(query):
160
+ generator = get_model()
161
  collection_name = "products"
162
  if "rezept" in query.lower() or "gericht" in query.lower():
163
  collection_name = "recipes"
 
166
  print(collection_name)
167
  print(query)
168
  if len(query.split()) < 3:
169
+ return generate_response(query, "Der Kunde muss womöglich detailliertere Angaben machen, entscheide, was du sagst.", last_messages, 512, 0.2, 0.95, generator[0])
170
  context = [document["document"] for document in search_qdrant_with_context(query, collection_name)]
171
 
172
  system_message = f"""<|im_start|>system Rolle: Du bist ein KI-Assistent der die Informationen in Relation zum Kontext bewertet.
 
179
  {query}
180
  <|im_end|>
181
  <|im_start|>assistant"""
182
+ refined_context = generator[1](system_message, do_sample=True, padding=True, truncation=True, top_p=0.95, max_new_tokens=100)
183
  # Retrieve relevant context from Qdrant
184
  print(f"""Refined context: {refined_context[0]["generated_text"].split("assistant").pop()}""")
185
 
186
  context = [document["document"] for document in search_qdrant_with_context(query + " " + refined_context[0]["generated_text"].split("assistant\n").pop(), collection_name)]
187
+ answer = generate_response(query, context, last_messages, 512, 0.2, 0.95, generator[0])
188
  full_conv = f"<|im_start|>user {query}<|im_end|><|im_start|>assistent {answer}<|im_end|>"
189
  # if len(last_messages) > 5:
190
  # last_messages.pop(0)