from typing import Dict, List import google.generativeai as genai class GeminiLLMConfig: def __init__( self, api_key: str, max_output_tokens: int, model: str, prompt: str, temperature: float, top_k: int, top_p: float, ): self.api_key = api_key self.max_output_tokens = max_output_tokens self.model = model self.prompt = prompt self.temperature = temperature self.top_k = top_k self.top_p = top_p @classmethod def default_config(cls): return cls( api_key="", max_output_tokens=512, model="gemini-1.5-flash", prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", temperature=1.0, top_k=40, top_p=0.95, ) class GeminiLLM: def __init__(self, config: GeminiLLMConfig): self.config = config genai.configure(api_key=self.config.api_key) self.model = genai.GenerativeModel( model_name=self.config.model, system_instruction=self.config.prompt ) def get_chat_completions_stream(self, messages: List[Dict[str, str]]): try: chat = self.model.start_chat(history=messages[0:-1]) response = chat.send_message( messages[-1].get("parts"), generation_config=genai.types.GenerationConfig( max_output_tokens=self.config.max_output_tokens, temperature=self.config.temperature, top_k=self.config.top_k, top_p=self.config.top_p, ), stream=True, ) return response except Exception as e: raise RuntimeError(f"get_chat_completions_stream failed, err: {e}") from e