Spaces:

modelsmafia
/

models_mafia_llm

Sleeping

models_mafia_llm / app.py

chane chat completion

bda7ad7 21 days ago

1.36 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# Use InferenceClient to interact with your model through the API
	client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")

	def chat_with_model(message, history):
	# Format conversation history
	messages = []
	for h in history:
	messages.append({"role": "user", "content": h[0]})
	messages.append({"role": "assistant", "content": h[1]})
	messages.append({"role": "user", "content": message})

	try:
	# Generate response using Inference API with correct parameters
	response = client.text_generation(
	prompt=message, # You might need to format this differently
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9
	)
	return response
	except Exception as e:
	return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."

	# Create Gradio interface
	demo = gr.ChatInterface(
	chat_with_model,
	title="Chat with Punjabi Gemma 2B",
	description="A bilingual chat model for English and Punjabi",
	examples=[
	["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?"],
	["Tell me about Punjab in a few sentences."]
	],
	theme="soft"
	)

	# Launch the interface
	if __name__ == "__main__":
	demo.launch()