Spaces:
Sleeping
Sleeping
Commit
·
d0fc0e2
1
Parent(s):
785cd03
use cached model fix
Browse files
app.py
CHANGED
@@ -62,7 +62,7 @@ _model_cache = None
|
|
62 |
|
63 |
@spaces.GPU
|
64 |
def get_model():
|
65 |
-
|
66 |
if _model_cache is None:
|
67 |
# Load model only if it's not already loaded
|
68 |
print("Loading model for the first time...")
|
@@ -157,7 +157,7 @@ def search_qdrant_with_context(query_text, collection_name, top_k=3):
|
|
157 |
|
158 |
@spaces.GPU
|
159 |
def interactive_chat(query):
|
160 |
-
|
161 |
collection_name = "products"
|
162 |
if "rezept" in query.lower() or "gericht" in query.lower():
|
163 |
collection_name = "recipes"
|
@@ -166,7 +166,7 @@ def interactive_chat(query):
|
|
166 |
print(collection_name)
|
167 |
print(query)
|
168 |
if len(query.split()) < 3:
|
169 |
-
return generate_response(query, "Der Kunde muss womöglich detailliertere Angaben machen, entscheide, was du sagst.", last_messages, 512, 0.2, 0.95, generator)
|
170 |
context = [document["document"] for document in search_qdrant_with_context(query, collection_name)]
|
171 |
|
172 |
system_message = f"""<|im_start|>system Rolle: Du bist ein KI-Assistent der die Informationen in Relation zum Kontext bewertet.
|
@@ -179,12 +179,12 @@ def interactive_chat(query):
|
|
179 |
{query}
|
180 |
<|im_end|>
|
181 |
<|im_start|>assistant"""
|
182 |
-
refined_context =
|
183 |
# Retrieve relevant context from Qdrant
|
184 |
print(f"""Refined context: {refined_context[0]["generated_text"].split("assistant").pop()}""")
|
185 |
|
186 |
context = [document["document"] for document in search_qdrant_with_context(query + " " + refined_context[0]["generated_text"].split("assistant\n").pop(), collection_name)]
|
187 |
-
answer = generate_response(query, context, last_messages, 512, 0.2, 0.95, generator)
|
188 |
full_conv = f"<|im_start|>user {query}<|im_end|><|im_start|>assistent {answer}<|im_end|>"
|
189 |
# if len(last_messages) > 5:
|
190 |
# last_messages.pop(0)
|
|
|
62 |
|
63 |
@spaces.GPU
|
64 |
def get_model():
|
65 |
+
print(f"Model: {_model_cache}")
|
66 |
if _model_cache is None:
|
67 |
# Load model only if it's not already loaded
|
68 |
print("Loading model for the first time...")
|
|
|
157 |
|
158 |
@spaces.GPU
|
159 |
def interactive_chat(query):
|
160 |
+
generator = get_model()
|
161 |
collection_name = "products"
|
162 |
if "rezept" in query.lower() or "gericht" in query.lower():
|
163 |
collection_name = "recipes"
|
|
|
166 |
print(collection_name)
|
167 |
print(query)
|
168 |
if len(query.split()) < 3:
|
169 |
+
return generate_response(query, "Der Kunde muss womöglich detailliertere Angaben machen, entscheide, was du sagst.", last_messages, 512, 0.2, 0.95, generator[0])
|
170 |
context = [document["document"] for document in search_qdrant_with_context(query, collection_name)]
|
171 |
|
172 |
system_message = f"""<|im_start|>system Rolle: Du bist ein KI-Assistent der die Informationen in Relation zum Kontext bewertet.
|
|
|
179 |
{query}
|
180 |
<|im_end|>
|
181 |
<|im_start|>assistant"""
|
182 |
+
refined_context = generator[1](system_message, do_sample=True, padding=True, truncation=True, top_p=0.95, max_new_tokens=100)
|
183 |
# Retrieve relevant context from Qdrant
|
184 |
print(f"""Refined context: {refined_context[0]["generated_text"].split("assistant").pop()}""")
|
185 |
|
186 |
context = [document["document"] for document in search_qdrant_with_context(query + " " + refined_context[0]["generated_text"].split("assistant\n").pop(), collection_name)]
|
187 |
+
answer = generate_response(query, context, last_messages, 512, 0.2, 0.95, generator[0])
|
188 |
full_conv = f"<|im_start|>user {query}<|im_end|><|im_start|>assistent {answer}<|im_end|>"
|
189 |
# if len(last_messages) > 5:
|
190 |
# last_messages.pop(0)
|