Spaces:
Running
Running
import gradio as gr | |
import requests | |
import json | |
import os | |
# Get API token from environment variable (set this in your Space settings) | |
API_TOKEN = os.getenv("HF_API_TOKEN", "") # Make sure to add your token in Space settings | |
def chat_with_model(message, history): | |
# Format conversation history | |
full_prompt = "" | |
for h in history: | |
full_prompt += f"<start_of_turn>user\n{h[0]}\n<end_of_turn>\n" | |
full_prompt += f"<start_of_turn>model\n{h[1]}\n<end_of_turn>\n" | |
full_prompt += f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n" | |
try: | |
headers = { | |
"Authorization": f"Bearer {API_TOKEN}", | |
"Content-Type": "application/json" | |
} | |
payload = { | |
"inputs": full_prompt, | |
"parameters": { | |
"max_new_tokens": 512, | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"do_sample": True | |
} | |
} | |
API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B" | |
response = requests.post(API_URL, headers=headers, json=payload) | |
if response.status_code == 200: | |
return response.json()[0]["generated_text"].replace(full_prompt, "") | |
else: | |
return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face." | |
except Exception as e: | |
return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet." | |
# Create Gradio interface | |
demo = gr.ChatInterface( | |
chat_with_model, | |
title="Chat with Punjabi Gemma 2B", | |
description="A bilingual chat model for English and Punjabi", | |
examples=[ | |
["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"], | |
["Tell me about Punjab in a few sentences."] | |
], | |
theme="soft" | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
demo.launch() |