modelsmafia's picture
api inference
213e222
raw
history blame contribute delete
2.01 kB
import gradio as gr
import requests
import json
import os
# Get API token from environment variable (set this in your Space settings)
API_TOKEN = os.getenv("HF_API_TOKEN", "") # Make sure to add your token in Space settings
def chat_with_model(message, history):
# Format conversation history
full_prompt = ""
for h in history:
full_prompt += f"<start_of_turn>user\n{h[0]}\n<end_of_turn>\n"
full_prompt += f"<start_of_turn>model\n{h[1]}\n<end_of_turn>\n"
full_prompt += f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n"
try:
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"inputs": full_prompt,
"parameters": {
"max_new_tokens": 512,
"temperature": 0.7,
"top_p": 0.9,
"do_sample": True
}
}
API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B"
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json()[0]["generated_text"].replace(full_prompt, "")
else:
return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face."
except Exception as e:
return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."
# Create Gradio interface
demo = gr.ChatInterface(
chat_with_model,
title="Chat with Punjabi Gemma 2B",
description="A bilingual chat model for English and Punjabi",
examples=[
["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"],
["Tell me about Punjab in a few sentences."]
],
theme="soft"
)
# Launch the interface
if __name__ == "__main__":
demo.launch()