File size: 2,940 Bytes
22507c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
## supporting functions
import base64, io
from typing import Any, Union, List
from PIL import Image  # Pillow image library

# thanks to https://community.openai.com/t/how-to-load-a-local-image-to-gpt4-vision-using-api/533090/5

def resize_image(image, max_dimension):
    width, height = image.size

    # Check if the image has a palette and convert it to true color mode
    if image.mode == "P":
        if "transparency" in image.info:
            image = image.convert("RGBA")
        else:
            image = image.convert("RGB")

    if width > max_dimension or height > max_dimension:
        if width > height:
            new_width = max_dimension
            new_height = int(height * (max_dimension / width))
        else:
            new_height = max_dimension
            new_width = int(width * (max_dimension / height))
        image = image.resize((new_width, new_height), Image.LANCZOS)
        
    return image

def convert_to_png(image):
    with io.BytesIO() as output:
        image.save(output, format="PNG")
        return output.getvalue()

def create_image_content(image):
    return {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image}"}
    }




def get_attr(attr:str, kwargs:dict, cls:Any=None) -> Any:
    attribute = kwargs.get(attr, None) if kwargs else None
    if (attribute is None) and (cls is not None):
        if hasattr(cls, attr):
            attribute = getattr(cls, attr)
    return attribute



def process_image(image: Union[str, Image.Image], max_size: int) -> str:
    if isinstance(image, str):
        with Image.open(image) as img:
            return process_pillow_image(img, max_size)
    elif isinstance(image, Image.Image):
        return process_pillow_image(image, max_size)
    else:
        raise ValueError("Input must be either a file path (str) or a Pillow Image object")


def process_pillow_image(image: Image.Image, max_size: int) -> str:
    width, height = image.size
    mimetype = image.get_format_mimetype() if hasattr(image, 'get_format_mimetype') else "image/png"
    
    if mimetype == "image/png" and width <= max_size and height <= max_size:
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format='PNG')
        return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
    else:
        resized_image = resize_image(image, max_size)
        png_image = convert_to_png(resized_image)
        return base64.b64encode(png_image).decode('utf-8')

def user_message_with_images(
    user_msg_str: str,
    images: List[Union[str, Image.Image]]|None = None,
    max_size_px: int = 1024,
) -> dict:
    if images is None:
        images = []
    
    base64_images = [process_image(img, max_size_px) for img in images]

    content = [{"type": "text", "text": user_msg_str}]
    content += [create_image_content(image) for image in base64_images]
    
    return {"role": "user", "content": content}