import gradio as gr import requests import json import os # Get API token from environment variable (set this in your Space settings) API_TOKEN = os.getenv("HF_API_TOKEN", "") # Make sure to add your token in Space settings def chat_with_model(message, history): # Format conversation history full_prompt = "" for h in history: full_prompt += f"user\n{h[0]}\n\n" full_prompt += f"model\n{h[1]}\n\n" full_prompt += f"user\n{message}\n\nmodel\n" try: headers = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json" } payload = { "inputs": full_prompt, "parameters": { "max_new_tokens": 512, "temperature": 0.7, "top_p": 0.9, "do_sample": True } } API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B" response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 200: return response.json()[0]["generated_text"].replace(full_prompt, "") else: return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face." except Exception as e: return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet." # Create Gradio interface demo = gr.ChatInterface( chat_with_model, title="Chat with Punjabi Gemma 2B", description="A bilingual chat model for English and Punjabi", examples=[ ["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"], ["Tell me about Punjab in a few sentences."] ], theme="soft" ) # Launch the interface if __name__ == "__main__": demo.launch()