Spaces:

ikraamkb
/

qtAnswering

Running

qtAnswering / appImage.py

Update appImage.py

297dd8a verified about 1 month ago

940 Bytes

	# appImage.py
	from transformers import pipeline
	import tempfile, os
	from PIL import Image
	from gtts import gTTS

	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

	async def caption_image(file):
	contents = await file.read()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
	tmp.write(contents)
	image_path = tmp.name

	captions = captioner(image_path)
	caption = captions[0]['generated_text'] if captions else "No caption generated."

	audio_path = text_to_speech(caption)

	result = {"caption": caption}
	if audio_path:
	result["audioUrl"] = f"/files/{os.path.basename(audio_path)}"
	return result

	def text_to_speech(text: str):
	try:
	tts = gTTS(text)
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_audio.name)
	return temp_audio.name
	except:
	return ""