Spaces:

modelsmafia
/

models_mafia_llm

Sleeping

File size: 1,362 Bytes

a529014
6d5782e
45c31be
6d5782e
 
45c31be
 
6d5782e
 
45c31be
6d5782e
 
 
45c31be
6d5782e
bda7ad7
 
 
6d5782e
 
 
 
bda7ad7
6d5782e
 
45c31be
6d5782e
a529014
45c31be
 
 
 
 
 
a529014
45c31be
a529014
 
45c31be
a529014
45c31be

import gradio as gr
from huggingface_hub import InferenceClient

# Use InferenceClient to interact with your model through the API
client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")

def chat_with_model(message, history):
    # Format conversation history
    messages = []
    for h in history:
        messages.append({"role": "user", "content": h[0]})
        messages.append({"role": "assistant", "content": h[1]})
    messages.append({"role": "user", "content": message})
    
    try:
        # Generate response using Inference API with correct parameters
        response = client.text_generation(
            prompt=message,  # You might need to format this differently
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9
        )
        return response
    except Exception as e:
        return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."

# Create Gradio interface
demo = gr.ChatInterface(
    chat_with_model,
    title="Chat with Punjabi Gemma 2B",
    description="A bilingual chat model for English and Punjabi",
    examples=[
        ["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"],
        ["Tell me about Punjab in a few sentences."]
    ],
    theme="soft"
)

# Launch the interface
if __name__ == "__main__":
    demo.launch()