Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,9 @@ from exception import CustomExceptionHandling
|
|
23 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
24 |
|
25 |
# Download gguf model files
|
|
|
|
|
|
|
26 |
hf_hub_download(
|
27 |
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
28 |
filename="google_gemma-3-1b-it-Q6_K.gguf",
|
@@ -66,13 +69,13 @@ llm_model = None
|
|
66 |
def respond(
|
67 |
message: str,
|
68 |
history: List[Tuple[str, str]],
|
69 |
-
model: str,
|
70 |
-
system_message: str,
|
71 |
-
max_tokens: int,
|
72 |
-
temperature: float,
|
73 |
-
top_p: float,
|
74 |
-
top_k: int,
|
75 |
-
repeat_penalty: float,
|
76 |
):
|
77 |
"""
|
78 |
Respond to a message using the Gemma3 model via Llama.cpp.
|
@@ -96,8 +99,18 @@ def respond(
|
|
96 |
global llm
|
97 |
global llm_model
|
98 |
|
|
|
|
|
|
|
|
|
99 |
# Load the model
|
100 |
if llm is None or llm_model != model:
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
llm = Llama(
|
102 |
model_path=f"models/{model}",
|
103 |
flash_attn=False,
|
@@ -232,9 +245,16 @@ demo = gr.ChatInterface(
|
|
232 |
chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
|
233 |
flagging_mode="never",
|
234 |
editable=True,
|
|
|
235 |
)
|
236 |
|
237 |
|
238 |
# Launch the chat interface
|
239 |
if __name__ == "__main__":
|
240 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
24 |
|
25 |
# Download gguf model files
|
26 |
+
if not os.path.exists("./models"):
|
27 |
+
os.makedirs("./models")
|
28 |
+
|
29 |
hf_hub_download(
|
30 |
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
31 |
filename="google_gemma-3-1b-it-Q6_K.gguf",
|
|
|
69 |
def respond(
|
70 |
message: str,
|
71 |
history: List[Tuple[str, str]],
|
72 |
+
model: str = "google_gemma-3-1b-it-Q5_K_M.gguf", # Set default model
|
73 |
+
system_message: str = "You are a helpful assistant.",
|
74 |
+
max_tokens: int = 1024,
|
75 |
+
temperature: float = 0.7,
|
76 |
+
top_p: float = 0.95,
|
77 |
+
top_k: int = 40,
|
78 |
+
repeat_penalty: float = 1.1,
|
79 |
):
|
80 |
"""
|
81 |
Respond to a message using the Gemma3 model via Llama.cpp.
|
|
|
99 |
global llm
|
100 |
global llm_model
|
101 |
|
102 |
+
# Ensure model is not None
|
103 |
+
if model is None:
|
104 |
+
model = "google_gemma-3-1b-it-Q5_K_M.gguf"
|
105 |
+
|
106 |
# Load the model
|
107 |
if llm is None or llm_model != model:
|
108 |
+
# Check if model file exists
|
109 |
+
model_path = f"models/{model}"
|
110 |
+
if not os.path.exists(model_path):
|
111 |
+
yield f"Error: Model file not found at {model_path}. Please check your model path."
|
112 |
+
return
|
113 |
+
|
114 |
llm = Llama(
|
115 |
model_path=f"models/{model}",
|
116 |
flash_attn=False,
|
|
|
245 |
chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
|
246 |
flagging_mode="never",
|
247 |
editable=True,
|
248 |
+
cache_examples=False,
|
249 |
)
|
250 |
|
251 |
|
252 |
# Launch the chat interface
|
253 |
if __name__ == "__main__":
|
254 |
+
demo.launch(
|
255 |
+
share=False,
|
256 |
+
server_name="0.0.0.0",
|
257 |
+
server_port=7860,
|
258 |
+
show_api=False,
|
259 |
+
ssr=False
|
260 |
+
)
|