import gradio as gr from huggingface_hub import InferenceClient # Use InferenceClient to interact with your model through the API client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B") def chat_with_model(message, history): # Format conversation history messages = [] for h in history: messages.append({"role": "user", "content": h[0]}) messages.append({"role": "assistant", "content": h[1]}) messages.append({"role": "user", "content": message}) try: # Generate response using Inference API with correct parameters response = client.text_generation( prompt=message, # You might need to format this differently max_new_tokens=512, temperature=0.7, top_p=0.9 ) return response except Exception as e: return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet." # Create Gradio interface demo = gr.ChatInterface( chat_with_model, title="Chat with Punjabi Gemma 2B", description="A bilingual chat model for English and Punjabi", examples=[ ["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"], ["Tell me about Punjab in a few sentences."] ], theme="soft" ) # Launch the interface if __name__ == "__main__": demo.launch()