import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

def create_chat_interface():
    # Initialize model and tokenizer
    model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        trust_remote_code=True
    )

    # Chat function
    def chat(message, history):
        # Format input with chat template
        prompt = f"User: {message}\nAssistant:"
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            num_return_sequences=1
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        return response

    # Create Gradio interface
    interface = gr.ChatInterface(
        fn=chat,
        title="Code Assistant Chat",
        description="Ask coding questions or get help with programming tasks.",
        theme=gr.themes.Soft(),
        examples=[
            "Write a Python function to sort a list",
            "How do I read a CSV file in pandas?",
            "Explain object-oriented programming concepts"
        ]
    )
    
    return interface

if __name__ == "__main__":
    # Launch the interface
    chat_app = create_chat_interface()
    chat_app.launch(share=True)