import gradio as gr from huggingface_hub import InferenceClient # Initialize the client with the updated model client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") # Define the system prompt SYSTEM_PROMPT = ( "\nYou are an expert programming assistant with deep knowledge of software development, debugging, " "and code optimization. Your responses should be clear, concise, and include code examples when appropriate. " "Always explain your reasoning step by step to help the user understand the solution." ) def respond(message, history: list[tuple[str, str]]): messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = "" # Static parameters for generation max_tokens = 512 temperature = 0.7 top_p = 0.95 for token in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): content = token.choices[0].delta.content response += content yield response # Create the Gradio ChatInterface without additional inputs demo = gr.ChatInterface(respond) if __name__ == "__main__": demo.launch()