import gradio as gr
import requests
import json
import os

# Get API token from environment variable (set this in your Space settings)
API_TOKEN = os.getenv("HF_API_TOKEN", "")  # Make sure to add your token in Space settings

def chat_with_model(message, history):
    # Format conversation history
    full_prompt = ""
    for h in history:
        full_prompt += f"<start_of_turn>user\n{h[0]}\n<end_of_turn>\n"
        full_prompt += f"<start_of_turn>model\n{h[1]}\n<end_of_turn>\n"
    full_prompt += f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n"
    
    try:
        headers = {
            "Authorization": f"Bearer {API_TOKEN}",
            "Content-Type": "application/json"
        }
        
        payload = {
            "inputs": full_prompt,
            "parameters": {
                "max_new_tokens": 512,
                "temperature": 0.7,
                "top_p": 0.9,
                "do_sample": True
            }
        }
        
        API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B"
        response = requests.post(API_URL, headers=headers, json=payload)
        
        if response.status_code == 200:
            return response.json()[0]["generated_text"].replace(full_prompt, "")
        else:
            return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face."
            
    except Exception as e:
        return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."

# Create Gradio interface
demo = gr.ChatInterface(
    chat_with_model,
    title="Chat with Punjabi Gemma 2B",
    description="A bilingual chat model for English and Punjabi",
    examples=[
        ["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"],
        ["Tell me about Punjab in a few sentences."]
    ],
    theme="soft"
)

# Launch the interface
if __name__ == "__main__":
    demo.launch()