import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch def create_chat_interface(): # Initialize model and tokenizer model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", trust_remote_code=True ) # Chat function def chat(message, history): # Format input with chat template prompt = f"User: {message}\nAssistant:" # Generate response inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.7, num_return_sequences=1 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create Gradio interface interface = gr.ChatInterface( fn=chat, title="Code Assistant Chat", description="Ask coding questions or get help with programming tasks.", theme=gr.themes.Soft(), examples=[ "Write a Python function to sort a list", "How do I read a CSV file in pandas?", "Explain object-oriented programming concepts" ] ) return interface if __name__ == "__main__": # Launch the interface chat_app = create_chat_interface() chat_app.launch(share=True)