sitammeur commited on
Commit
26affc2
·
verified ·
1 Parent(s): 62d72b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -8
app.py CHANGED
@@ -23,6 +23,9 @@ from exception import CustomExceptionHandling
23
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
24
 
25
  # Download gguf model files
 
 
 
26
  hf_hub_download(
27
  repo_id="bartowski/google_gemma-3-1b-it-GGUF",
28
  filename="google_gemma-3-1b-it-Q6_K.gguf",
@@ -66,13 +69,13 @@ llm_model = None
66
  def respond(
67
  message: str,
68
  history: List[Tuple[str, str]],
69
- model: str,
70
- system_message: str,
71
- max_tokens: int,
72
- temperature: float,
73
- top_p: float,
74
- top_k: int,
75
- repeat_penalty: float,
76
  ):
77
  """
78
  Respond to a message using the Gemma3 model via Llama.cpp.
@@ -96,8 +99,18 @@ def respond(
96
  global llm
97
  global llm_model
98
 
 
 
 
 
99
  # Load the model
100
  if llm is None or llm_model != model:
 
 
 
 
 
 
101
  llm = Llama(
102
  model_path=f"models/{model}",
103
  flash_attn=False,
@@ -232,9 +245,16 @@ demo = gr.ChatInterface(
232
  chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
233
  flagging_mode="never",
234
  editable=True,
 
235
  )
236
 
237
 
238
  # Launch the chat interface
239
  if __name__ == "__main__":
240
- demo.launch()
 
 
 
 
 
 
 
23
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
24
 
25
  # Download gguf model files
26
+ if not os.path.exists("./models"):
27
+ os.makedirs("./models")
28
+
29
  hf_hub_download(
30
  repo_id="bartowski/google_gemma-3-1b-it-GGUF",
31
  filename="google_gemma-3-1b-it-Q6_K.gguf",
 
69
  def respond(
70
  message: str,
71
  history: List[Tuple[str, str]],
72
+ model: str = "google_gemma-3-1b-it-Q5_K_M.gguf", # Set default model
73
+ system_message: str = "You are a helpful assistant.",
74
+ max_tokens: int = 1024,
75
+ temperature: float = 0.7,
76
+ top_p: float = 0.95,
77
+ top_k: int = 40,
78
+ repeat_penalty: float = 1.1,
79
  ):
80
  """
81
  Respond to a message using the Gemma3 model via Llama.cpp.
 
99
  global llm
100
  global llm_model
101
 
102
+ # Ensure model is not None
103
+ if model is None:
104
+ model = "google_gemma-3-1b-it-Q5_K_M.gguf"
105
+
106
  # Load the model
107
  if llm is None or llm_model != model:
108
+ # Check if model file exists
109
+ model_path = f"models/{model}"
110
+ if not os.path.exists(model_path):
111
+ yield f"Error: Model file not found at {model_path}. Please check your model path."
112
+ return
113
+
114
  llm = Llama(
115
  model_path=f"models/{model}",
116
  flash_attn=False,
 
245
  chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
246
  flagging_mode="never",
247
  editable=True,
248
+ cache_examples=False,
249
  )
250
 
251
 
252
  # Launch the chat interface
253
  if __name__ == "__main__":
254
+ demo.launch(
255
+ share=False,
256
+ server_name="0.0.0.0",
257
+ server_port=7860,
258
+ show_api=False,
259
+ ssr=False
260
+ )