File size: 1,174 Bytes
8af6af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json

import numpy as np
from PIL import Image
from google import genai
from google.genai import types

from .base import LLMBase

FUNCTION_CALL = json.load(open("tools/schema_gemini.json", "r"))

class GeminiAPI(LLMBase):
    @classmethod
    def call(cls, image: np.ndarray, model: str, token: str) -> dict:
        client = genai.Client(api_key=token) # Initialize the client with the API key
        encode_img = Image.fromarray(image) # Convert the image for the API

        config = types.GenerateContentConfig(
            tools=[types.Tool(function_declarations=[FUNCTION_CALL])],
            tool_config={
                "function_calling_config": {
                    "mode": "ANY",
                    "allowed_function_names": [FUNCTION_CALL["name"]]
                }
            }
        )        
        response = client.models.generate_content(
            model=model,
            contents=[encode_img],
            config=config
        )
        if response.candidates[0].content.parts[0].function_call:
            function_call = response.candidates[0].content.parts[0].function_call
            return function_call.args

        return {}