q8 model
Browse files
app.py
CHANGED
@@ -2,9 +2,14 @@ import gradio as gr
|
|
2 |
|
3 |
from llama_cpp import Llama, LlamaTokenizer
|
4 |
|
|
|
|
|
|
|
|
|
|
|
5 |
llm = Llama.from_pretrained(
|
6 |
-
repo_id="ID2223JR/
|
7 |
-
filename="unsloth.
|
8 |
)
|
9 |
|
10 |
|
@@ -56,21 +61,11 @@ def submit_to_model():
|
|
56 |
content = ""
|
57 |
|
58 |
for partial_response in response:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
elif "message" in partial_response["choices"][0]:
|
65 |
-
content += partial_response["choices"][0]["message"].get(
|
66 |
-
"content", ""
|
67 |
-
)
|
68 |
-
else:
|
69 |
-
content += partial_response["choices"][0].get("text", "")
|
70 |
-
if content:
|
71 |
-
yield content
|
72 |
-
else:
|
73 |
-
yield "Unexpected response structure."
|
74 |
|
75 |
ingredients_list.clear() # Reset list after generation
|
76 |
|
|
|
2 |
|
3 |
from llama_cpp import Llama, LlamaTokenizer
|
4 |
|
5 |
+
# llm = Llama.from_pretrained(
|
6 |
+
# repo_id="ID2223JR/gguf_model",
|
7 |
+
# filename="unsloth.Q4_K_M.gguf",
|
8 |
+
# )
|
9 |
+
|
10 |
llm = Llama.from_pretrained(
|
11 |
+
repo_id="ID2223JR/gguf_model_q8",
|
12 |
+
filename="unsloth.Q8_0.gguf",
|
13 |
)
|
14 |
|
15 |
|
|
|
61 |
content = ""
|
62 |
|
63 |
for partial_response in response:
|
64 |
+
|
65 |
+
content += partial_response["choices"][0]["delta"].get("content", "")
|
66 |
+
|
67 |
+
if content:
|
68 |
+
yield content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
ingredients_list.clear() # Reset list after generation
|
71 |
|