Tonic commited on
Commit
2749bcc
·
unverified ·
1 Parent(s): 5b6abbe

model loading fix

Browse files
Files changed (1) hide show
  1. app.py +62 -45
app.py CHANGED
@@ -174,16 +174,39 @@ class PixtralModel(nn.Module):
174
  else:
175
  return vision_output
176
 
177
- def load_model(params, model_path):
178
- model = PixtralModel(params)
179
- with safe_open(f'{model_path}/consolidated.safetensors', framework="pt", device="cpu") as f:
180
- for name, param in model.named_parameters():
181
- if name in f.keys():
182
- param.data = f.get_tensor(name)
183
- model.eval()
184
- return model
185
-
186
- model = load_model(params, model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  tokenizer = MistralTokenizer.from_model("pixtral")
188
 
189
  def preprocess_image(image):
@@ -206,39 +229,12 @@ def gpu_memory_manager():
206
  torch.cuda.empty_cache()
207
  gc.collect()
208
 
209
- def cuda_error_handler(func):
210
- def wrapper(*args, **kwargs):
211
- try:
212
- return func(*args, **kwargs)
213
- except RuntimeError as e:
214
- if "CUDA" in str(e):
215
- print(f"CUDA error occurred: {str(e)}")
216
- print("Attempting to recover...")
217
- torch.cuda.empty_cache()
218
- gc.collect()
219
- try:
220
- return func(*args, **kwargs)
221
- except Exception as e2:
222
- print(f"Recovery failed. Error: {str(e2)}")
223
- return f"An error occurred: {str(e2)}", 0, 0
224
- else:
225
- raise
226
- except Exception as e:
227
- print(f"An unexpected error occurred: {str(e)}")
228
- traceback.print_exc()
229
- return f"An unexpected error occurred: {str(e)}", 0, 0
230
- return wrapper
231
-
232
  @spaces.GPU()
233
- @cuda_error_handler
234
  def generate_text(image, prompt, max_tokens):
235
  try:
236
  with gpu_memory_manager():
237
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
238
- # Use load_img here
239
  image_pil = load_img(image, output_type="pil", input_type="auto")
240
  image_tensor = preprocess_image(image_pil).to(device)
241
- model.to(device)
242
 
243
  tokenized = tokenizer.encode_chat_completion(
244
  ChatCompletionRequest(
@@ -260,8 +256,6 @@ def generate_text(image, prompt, max_tokens):
260
 
261
  generated_text = tokenizer.decode(generated_ids[0].tolist())
262
 
263
- # # Move model back to CPU and clear CUDA memory
264
- # model.to("cpu")
265
  torch.cuda.empty_cache()
266
 
267
  return generated_text, len(generated_ids[0]), 1
@@ -271,17 +265,13 @@ def generate_text(image, prompt, max_tokens):
271
  return f"Error: {str(e)}", 0, 0
272
 
273
  @spaces.GPU()
274
- @cuda_error_handler
275
  def calculate_similarity(image1, image2):
276
  try:
277
  with gpu_memory_manager():
278
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
279
- # Use load_img for both images
280
  pil_image1 = load_img(image1, output_type="pil", input_type="auto")
281
  pil_image2 = load_img(image2, output_type="pil", input_type="auto")
282
  tensor1 = preprocess_image(pil_image1).to(device)
283
  tensor2 = preprocess_image(pil_image2).to(device)
284
- model.to(device)
285
 
286
  with torch.no_grad():
287
  embedding1 = model(tensor1).mean(dim=1)
@@ -289,8 +279,6 @@ def calculate_similarity(image1, image2):
289
 
290
  similarity = F.cosine_similarity(embedding1, embedding2).item()
291
 
292
- # # Move model back to CPU and clear CUDA memory
293
- # model.to("cpu")
294
  torch.cuda.empty_cache()
295
 
296
  return similarity
@@ -298,6 +286,35 @@ def calculate_similarity(image1, image2):
298
  print(f"Error in calculate_similarity: {str(e)}")
299
  traceback.print_exc()
300
  return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  with gr.Blocks() as demo:
303
  gr.Markdown(title)
 
174
  else:
175
  return vision_output
176
 
177
+
178
+ @contextmanager
179
+ def gpu_memory_manager():
180
+ try:
181
+ torch.cuda.empty_cache()
182
+ yield
183
+ finally:
184
+ torch.cuda.empty_cache()
185
+ gc.collect()
186
+
187
+ def load_model_with_fallback(params, model_path):
188
+ try:
189
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
190
+ model = PixtralModel(params)
191
+ with safe_open(f'{model_path}/consolidated.safetensors', framework="pt", device="cpu") as f:
192
+ for name, param in model.named_parameters():
193
+ if name in f.keys():
194
+ param.data = f.get_tensor(name)
195
+ model.eval()
196
+ model.to(device)
197
+ return model, device
198
+ except RuntimeError as e:
199
+ print(f"Error loading model on GPU: {str(e)}")
200
+ print("Falling back to CPU...")
201
+ model = PixtralModel(params)
202
+ with safe_open(f'{model_path}/consolidated.safetensors', framework="pt", device="cpu") as f:
203
+ for name, param in model.named_parameters():
204
+ if name in f.keys():
205
+ param.data = f.get_tensor(name)
206
+ model.eval()
207
+ return model, torch.device("cpu")
208
+
209
+ model, device = load_model_with_fallback(params, model_path)
210
  tokenizer = MistralTokenizer.from_model("pixtral")
211
 
212
  def preprocess_image(image):
 
229
  torch.cuda.empty_cache()
230
  gc.collect()
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  @spaces.GPU()
 
233
  def generate_text(image, prompt, max_tokens):
234
  try:
235
  with gpu_memory_manager():
 
 
236
  image_pil = load_img(image, output_type="pil", input_type="auto")
237
  image_tensor = preprocess_image(image_pil).to(device)
 
238
 
239
  tokenized = tokenizer.encode_chat_completion(
240
  ChatCompletionRequest(
 
256
 
257
  generated_text = tokenizer.decode(generated_ids[0].tolist())
258
 
 
 
259
  torch.cuda.empty_cache()
260
 
261
  return generated_text, len(generated_ids[0]), 1
 
265
  return f"Error: {str(e)}", 0, 0
266
 
267
  @spaces.GPU()
 
268
  def calculate_similarity(image1, image2):
269
  try:
270
  with gpu_memory_manager():
 
 
271
  pil_image1 = load_img(image1, output_type="pil", input_type="auto")
272
  pil_image2 = load_img(image2, output_type="pil", input_type="auto")
273
  tensor1 = preprocess_image(pil_image1).to(device)
274
  tensor2 = preprocess_image(pil_image2).to(device)
 
275
 
276
  with torch.no_grad():
277
  embedding1 = model(tensor1).mean(dim=1)
 
279
 
280
  similarity = F.cosine_similarity(embedding1, embedding2).item()
281
 
 
 
282
  torch.cuda.empty_cache()
283
 
284
  return similarity
 
286
  print(f"Error in calculate_similarity: {str(e)}")
287
  traceback.print_exc()
288
  return f"Error: {str(e)}"
289
+
290
+ # @spaces.GPU()
291
+ # @cuda_error_handler
292
+ # def calculate_similarity(image1, image2):
293
+ # try:
294
+ # with gpu_memory_manager():
295
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
296
+ # # Use load_img for both images
297
+ # pil_image1 = load_img(image1, output_type="pil", input_type="auto")
298
+ # pil_image2 = load_img(image2, output_type="pil", input_type="auto")
299
+ # tensor1 = preprocess_image(pil_image1).to(device)
300
+ # tensor2 = preprocess_image(pil_image2).to(device)
301
+ # model.to(device)
302
+
303
+ # with torch.no_grad():
304
+ # embedding1 = model(tensor1).mean(dim=1)
305
+ # embedding2 = model(tensor2).mean(dim=1)
306
+
307
+ # similarity = F.cosine_similarity(embedding1, embedding2).item()
308
+
309
+ # # # Move model back to CPU and clear CUDA memory
310
+ # # model.to("cpu")
311
+ # torch.cuda.empty_cache()
312
+
313
+ # return similarity
314
+ # except Exception as e:
315
+ # print(f"Error in calculate_similarity: {str(e)}")
316
+ # traceback.print_exc()
317
+ # return f"Error: {str(e)}"
318
 
319
  with gr.Blocks() as demo:
320
  gr.Markdown(title)