m-ric HF Staff commited on
Commit
13ad481
·
1 Parent(s): 5749cc3

Re-add images in inference. Inference endpoint model still broken context length

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. e2bqwen.py +128 -168
app.py CHANGED
@@ -29,8 +29,8 @@ if not os.path.exists(TMP_DIR):
29
  hf_token = os.getenv("HUGGINGFACE_API_KEY")
30
  login(token=hf_token)
31
  model = QwenVLAPIModel(
 
32
  hf_token = hf_token,
33
- hf_base_url="https://s41ydkv0iyjeokyj.us-east-1.aws.endpoints.huggingface.cloud"
34
  )
35
 
36
 
 
29
  hf_token = os.getenv("HUGGINGFACE_API_KEY")
30
  login(token=hf_token)
31
  model = QwenVLAPIModel(
32
+ hf_base_url="https://s41ydkv0iyjeokyj.us-east-1.aws.endpoints.huggingface.cloud",
33
  hf_token = hf_token,
 
34
  )
35
 
36
 
e2bqwen.py CHANGED
@@ -346,7 +346,47 @@ class E2BVisionAgent(CodeAgent):
346
  self.desktop.kill()
347
  print("E2B sandbox terminated")
348
 
349
- from smolagents import HfApiModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  # class QwenVLAPIModel(Model):
352
  # """Model wrapper for Qwen2.5VL API with fallback mechanism"""
@@ -359,16 +399,25 @@ from smolagents import HfApiModel
359
  # hf_base_url: str = "https://n5wr7lfx6wp94tvl.us-east-1.aws.endpoints.huggingface.cloud"
360
  # ):
361
  # super().__init__()
 
362
  # self.model_id = model_path
 
 
363
  # self.hf_base_url = hf_base_url
364
- # self.dedicated_endpoint_model = HfApiModel(
365
- # hf_base_url,
366
- # token=hf_token
 
367
  # )
368
- # self.fallback_model = HfApiModel(
369
- # model_path,
370
- # provider=provider,
371
- # token=hf_token,
 
 
 
 
 
372
  # )
373
 
374
  # def __call__(
@@ -377,15 +426,27 @@ from smolagents import HfApiModel
377
  # stop_sequences: Optional[List[str]] = None,
378
  # **kwargs
379
  # ) -> ChatMessage:
 
380
 
 
 
 
 
381
  # try:
382
- # return self.dedicated_endpoint_model(messages, stop_sequences, **kwargs)
 
 
 
 
 
 
383
  # except Exception as e:
384
  # print(f"HF endpoint failed with error: {e}. Falling back to hyperbolic.")
385
-
386
- # # Continue to fallback
 
387
  # try:
388
- # return self.fallback_model(messages, stop_sequences, **kwargs)
389
  # except Exception as e:
390
  # raise Exception(f"Both endpoints failed. Last error: {e}")
391
 
@@ -411,7 +472,6 @@ from smolagents import HfApiModel
411
  # else:
412
  # # Image is a PIL image or similar object
413
  # img_byte_arr = BytesIO()
414
- # item["image"].save(img_byte_arr, format="PNG")
415
  # base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
416
 
417
  # content.append({
@@ -428,167 +488,67 @@ from smolagents import HfApiModel
428
 
429
  # return formatted_messages
430
 
431
- class QwenVLAPIModel(Model):
432
- """Model wrapper for Qwen2.5VL API with fallback mechanism"""
433
-
434
- def __init__(
435
- self,
436
- model_path: str = "Qwen/Qwen2.5-VL-72B-Instruct",
437
- provider: str = "hyperbolic",
438
- hf_token: str = None,
439
- hf_base_url: str = "https://n5wr7lfx6wp94tvl.us-east-1.aws.endpoints.huggingface.cloud"
440
- ):
441
- super().__init__()
442
- self.model_path = model_path
443
- self.model_id = model_path
444
- self.provider = provider
445
- self.hf_token = hf_token
446
- self.hf_base_url = hf_base_url
447
-
448
- # Initialize hyperbolic client
449
- self.hyperbolic_client = InferenceClient(
450
- provider=self.provider,
451
- )
452
-
453
- assert not self.hf_base_url.endswith("/v1/"), "Enter your base url without '/v1/' suffix."
454
-
455
- # Initialize HF OpenAI-compatible client if token is provided
456
- self.hf_client = None
457
- from openai import OpenAI
458
- self.hf_client = OpenAI(
459
- base_url=self.hf_base_url + "/v1/",
460
- api_key=self.hf_token
461
- )
462
-
463
- def __call__(
464
- self,
465
- messages: List[Dict[str, Any]],
466
- stop_sequences: Optional[List[str]] = None,
467
- **kwargs
468
- ) -> ChatMessage:
469
- """Convert a list of messages to an API request with fallback mechanism"""
470
-
471
- # Format messages once for both APIs
472
- formatted_messages = self._format_messages(messages)
473
-
474
- # First try the HF endpoint if available - THIS ALWAYS FAILS SO SKIPPING
475
- try:
476
- completion = self._call_hf_endpoint(
477
- formatted_messages,
478
- stop_sequences,
479
- **kwargs
480
- )
481
- return ChatMessage(role=MessageRole.ASSISTANT, content=completion)
482
- except Exception as e:
483
- print(f"HF endpoint failed with error: {e}. Falling back to hyperbolic.")
484
- # Continue to fallback
485
-
486
- # Fallback to hyperbolic
487
- try:
488
- return self._call_hyperbolic(formatted_messages, stop_sequences, **kwargs)
489
- except Exception as e:
490
- raise Exception(f"Both endpoints failed. Last error: {e}")
491
-
492
- def _format_messages(self, messages: List[Dict[str, Any]]):
493
- """Format messages for API requests - works for both endpoints"""
494
-
495
- formatted_messages = []
496
-
497
- for msg in messages:
498
- role = msg["role"]
499
- content = []
500
-
501
- if isinstance(msg["content"], list):
502
- for item in msg["content"]:
503
- if item["type"] == "text":
504
- content.append({"type": "text", "text": item["text"]})
505
- elif item["type"] == "image":
506
- # # Handle image path or direct image object
507
- # if isinstance(item["image"], str):
508
- # # Image is a path
509
- # with open(item["image"], "rb") as image_file:
510
- # base64_image = base64.b64encode(image_file.read()).decode("utf-8")
511
- # else:
512
- # # Image is a PIL image or similar object
513
- # img_byte_arr = BytesIO()
514
- # base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
515
-
516
- # content.append({
517
- # "type": "image_url",
518
- # "image_url": {
519
- # "url": f"data:image/png;base64,{base64_image}"
520
- # }
521
- # })
522
- pass
523
- else:
524
- # Plain text message
525
- content = [{"type": "text", "text": msg["content"]}]
526
-
527
- formatted_messages.append({"role": role, "content": content})
528
-
529
- return formatted_messages
530
-
531
- def _call_hf_endpoint(self, formatted_messages, stop_sequences=None, **kwargs):
532
- """Call the Hugging Face OpenAI-compatible endpoint"""
533
 
534
- # Extract parameters with defaults
535
- max_tokens = kwargs.get("max_new_tokens", 4096)
536
- temperature = kwargs.get("temperature", 0.7)
537
- top_p = kwargs.get("top_p", 0.9)
538
- stream = kwargs.get("stream", False)
539
 
540
- completion = self.hf_client.chat.completions.create(
541
- model="tgi", # Model name for the endpoint
542
- messages=formatted_messages,
543
- max_tokens=max_tokens,
544
- temperature=temperature,
545
- top_p=top_p,
546
- stream=stream,
547
- stop=stop_sequences
548
- )
549
 
550
- if stream:
551
- # For streaming responses, return a generator
552
- def stream_generator():
553
- for chunk in completion:
554
- yield chunk.choices[0].delta.content or ""
555
- return stream_generator()
556
- else:
557
- # For non-streaming, return the full text
558
- return completion.choices[0].message.content
559
 
560
- def _call_hyperbolic(self, formatted_messages, stop_sequences=None, **kwargs):
561
- """Call the hyperbolic API"""
562
 
563
- completion = self.hyperbolic_client.chat.completions.create(
564
- model=self.model_path,
565
- messages=formatted_messages,
566
- max_tokens=kwargs.get("max_new_tokens", 4096),
567
- temperature=kwargs.get("temperature", 0.7),
568
- top_p=kwargs.get("top_p", 0.9),
569
- stop=stop_sequences
570
- )
571
 
572
- # Extract the response text
573
- output_text = completion.choices[0].message.content
574
 
575
- return ChatMessage(role=MessageRole.ASSISTANT, content=output_text)
576
 
577
- def to_dict(self) -> Dict[str, Any]:
578
- """Convert the model to a dictionary"""
579
- return {
580
- "class": self.__class__.__name__,
581
- "model_path": self.model_path,
582
- "provider": self.provider,
583
- "hf_base_url": self.hf_base_url,
584
- # We don't save the API keys for security reasons
585
- }
586
 
587
- @classmethod
588
- def from_dict(cls, data: Dict[str, Any]) -> "QwenVLAPIModel":
589
- """Create a model from a dictionary"""
590
- return cls(
591
- model_path=data.get("model_path", "Qwen/Qwen2.5-VL-72B-Instruct"),
592
- provider=data.get("provider", "hyperbolic"),
593
- hf_base_url=data.get("hf_base_url", "https://s41ydkv0iyjeokyj.us-east-1.aws.endpoints.huggingface.cloud"),
594
- )
 
346
  self.desktop.kill()
347
  print("E2B sandbox terminated")
348
 
349
+
350
+ class QwenVLAPIModel(Model):
351
+ """Model wrapper for Qwen2.5VL API with fallback mechanism"""
352
+
353
+ def __init__(
354
+ self,
355
+ hf_base_url,
356
+ model_path: str = "Qwen/Qwen2.5-VL-72B-Instruct",
357
+ provider: str = "hyperbolic",
358
+ hf_token: str = None,
359
+ ):
360
+ super().__init__()
361
+ self.model_id = model_path
362
+ self.hf_base_url = hf_base_url
363
+ self.dedicated_endpoint_model = HfApiModel(
364
+ hf_base_url,
365
+ token=hf_token
366
+ )
367
+ self.fallback_model = HfApiModel(
368
+ model_path,
369
+ provider=provider,
370
+ token=hf_token,
371
+ )
372
+
373
+ def __call__(
374
+ self,
375
+ messages: List[Dict[str, Any]],
376
+ stop_sequences: Optional[List[str]] = None,
377
+ **kwargs
378
+ ) -> ChatMessage:
379
+
380
+ try:
381
+ return self.dedicated_endpoint_model(messages, stop_sequences, **kwargs)
382
+ except Exception as e:
383
+ print(f"HF endpoint failed with error: {e}. Falling back to hyperbolic.")
384
+
385
+ # Continue to fallback
386
+ try:
387
+ return self.fallback_model(messages, stop_sequences, **kwargs)
388
+ except Exception as e:
389
+ raise Exception(f"Both endpoints failed. Last error: {e}")
390
 
391
  # class QwenVLAPIModel(Model):
392
  # """Model wrapper for Qwen2.5VL API with fallback mechanism"""
 
399
  # hf_base_url: str = "https://n5wr7lfx6wp94tvl.us-east-1.aws.endpoints.huggingface.cloud"
400
  # ):
401
  # super().__init__()
402
+ # self.model_path = model_path
403
  # self.model_id = model_path
404
+ # self.provider = provider
405
+ # self.hf_token = hf_token
406
  # self.hf_base_url = hf_base_url
407
+
408
+ # # Initialize hyperbolic client
409
+ # self.hyperbolic_client = InferenceClient(
410
+ # provider=self.provider,
411
  # )
412
+
413
+ # assert not self.hf_base_url.endswith("/v1/"), "Enter your base url without '/v1/' suffix."
414
+
415
+ # # Initialize HF OpenAI-compatible client if token is provided
416
+ # self.hf_client = None
417
+ # from openai import OpenAI
418
+ # self.hf_client = OpenAI(
419
+ # base_url=self.hf_base_url + "/v1/",
420
+ # api_key=self.hf_token
421
  # )
422
 
423
  # def __call__(
 
426
  # stop_sequences: Optional[List[str]] = None,
427
  # **kwargs
428
  # ) -> ChatMessage:
429
+ # """Convert a list of messages to an API request with fallback mechanism"""
430
 
431
+ # # Format messages once for both APIs
432
+ # formatted_messages = self._format_messages(messages)
433
+
434
+ # # First try the HF endpoint if available - THIS ALWAYS FAILS SO SKIPPING
435
  # try:
436
+ # completion = self._call_hf_endpoint(
437
+ # formatted_messages,
438
+ # stop_sequences,
439
+ # **kwargs
440
+ # )
441
+ # print("SUCCESSFUL call of inference endpoint")
442
+ # return ChatMessage(role=MessageRole.ASSISTANT, content=completion)
443
  # except Exception as e:
444
  # print(f"HF endpoint failed with error: {e}. Falling back to hyperbolic.")
445
+ # # Continue to fallback
446
+
447
+ # # Fallback to hyperbolic
448
  # try:
449
+ # return self._call_hyperbolic(formatted_messages, stop_sequences, **kwargs)
450
  # except Exception as e:
451
  # raise Exception(f"Both endpoints failed. Last error: {e}")
452
 
 
472
  # else:
473
  # # Image is a PIL image or similar object
474
  # img_byte_arr = BytesIO()
 
475
  # base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
476
 
477
  # content.append({
 
488
 
489
  # return formatted_messages
490
 
491
+ # def _call_hf_endpoint(self, formatted_messages, stop_sequences=None, **kwargs):
492
+ # """Call the Hugging Face OpenAI-compatible endpoint"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
494
+ # # Extract parameters with defaults
495
+ # max_tokens = kwargs.get("max_new_tokens", 4096)
496
+ # temperature = kwargs.get("temperature", 0.7)
497
+ # top_p = kwargs.get("top_p", 0.9)
498
+ # stream = kwargs.get("stream", False)
499
 
500
+ # completion = self.hf_client.chat.completions.create(
501
+ # model="tgi", # Model name for the endpoint
502
+ # messages=formatted_messages,
503
+ # max_tokens=max_tokens,
504
+ # temperature=temperature,
505
+ # top_p=top_p,
506
+ # stream=stream,
507
+ # stop=stop_sequences
508
+ # )
509
 
510
+ # if stream:
511
+ # # For streaming responses, return a generator
512
+ # def stream_generator():
513
+ # for chunk in completion:
514
+ # yield chunk.choices[0].delta.content or ""
515
+ # return stream_generator()
516
+ # else:
517
+ # # For non-streaming, return the full text
518
+ # return completion.choices[0].message.content
519
 
520
+ # def _call_hyperbolic(self, formatted_messages, stop_sequences=None, **kwargs):
521
+ # """Call the hyperbolic API"""
522
 
523
+ # completion = self.hyperbolic_client.chat.completions.create(
524
+ # model=self.model_path,
525
+ # messages=formatted_messages,
526
+ # max_tokens=kwargs.get("max_new_tokens", 4096),
527
+ # temperature=kwargs.get("temperature", 0.7),
528
+ # top_p=kwargs.get("top_p", 0.9),
529
+ # stop=stop_sequences
530
+ # )
531
 
532
+ # # Extract the response text
533
+ # output_text = completion.choices[0].message.content
534
 
535
+ # return ChatMessage(role=MessageRole.ASSISTANT, content=output_text)
536
 
537
+ # def to_dict(self) -> Dict[str, Any]:
538
+ # """Convert the model to a dictionary"""
539
+ # return {
540
+ # "class": self.__class__.__name__,
541
+ # "model_path": self.model_path,
542
+ # "provider": self.provider,
543
+ # "hf_base_url": self.hf_base_url,
544
+ # # We don't save the API keys for security reasons
545
+ # }
546
 
547
+ # @classmethod
548
+ # def from_dict(cls, data: Dict[str, Any]) -> "QwenVLAPIModel":
549
+ # """Create a model from a dictionary"""
550
+ # return cls(
551
+ # model_path=data.get("model_path", "Qwen/Qwen2.5-VL-72B-Instruct"),
552
+ # provider=data.get("provider", "hyperbolic"),
553
+ # hf_base_url=data.get("hf_base_url", "https://s41ydkv0iyjeokyj.us-east-1.aws.endpoints.huggingface.cloud"),
554
+ # )