Spaces:
Running
Running
import json | |
import numpy as np | |
from PIL import Image | |
from google import genai | |
from google.genai import types | |
from .base import LLMBase | |
FUNCTION_CALL = json.load(open("tools/schema_gemini.json", "r")) | |
class GeminiAPI(LLMBase): | |
def call(cls, image: np.ndarray, model: str, token: str) -> dict: | |
client = genai.Client(api_key=token) # Initialize the client with the API key | |
encode_img = Image.fromarray(image) # Convert the image for the API | |
config = types.GenerateContentConfig( | |
tools=[types.Tool(function_declarations=[FUNCTION_CALL])], | |
tool_config={ | |
"function_calling_config": { | |
"mode": "ANY", | |
"allowed_function_names": [FUNCTION_CALL["name"]] | |
} | |
} | |
) | |
response = client.models.generate_content( | |
model=model, | |
contents=[encode_img], | |
config=config | |
) | |
if response.candidates[0].content.parts[0].function_call: | |
function_call = response.candidates[0].content.parts[0].function_call | |
return function_call.args | |
return {} |