import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the client with the updated model
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# Define the system prompt
SYSTEM_PROMPT = (
    "\nYou are an expert programming assistant with deep knowledge of software development, debugging, "
    "and code optimization. Your responses should be clear, concise, and include code examples when appropriate. "
    "Always explain your reasoning step by step to help the user understand the solution."
)

def respond(message, history: list[tuple[str, str]]):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": message})
    
    response = ""
    
    # Static parameters for generation
    max_tokens = 512
    temperature = 0.7
    top_p = 0.95

    for token in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        content = token.choices[0].delta.content
        response += content
        yield response

# Create the Gradio ChatInterface without additional inputs
demo = gr.ChatInterface(respond)

if __name__ == "__main__":
    demo.launch()