Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

Summarization / appImage.py

ikraamkb

Update appImage.py

cc49774 verified 7 days ago

raw

history blame

3.03 kB

	"""from fastapi import FastAPI, UploadFile, File
	from fastapi.responses import RedirectResponse, JSONResponse
	from transformers import AutoProcessor, AutoModelForCausalLM
	from PIL import Image
	import tempfile
	import torch

	app = FastAPI()

	# Load model
	try:
	processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
	model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
	USE_GIT = True
	except Exception:
	from transformers import pipeline
	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
	USE_GIT = False

	def generate_caption(image_path):
	try:
	if USE_GIT:
	image = Image.open(image_path)
	inputs = processor(images=image, return_tensors="pt")
	outputs = model.generate(**inputs, max_length=50)
	return processor.batch_decode(outputs, skip_special_tokens=True)[0]
	else:
	result = captioner(image_path)
	return result[0]['generated_text']
	except Exception as e:
	return f"Error generating caption: {str(e)}"

	@app.post("/imagecaption/")
	async def caption_from_frontend(file: UploadFile = File(...)):
	contents = await file.read()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
	tmp.write(contents)
	image_path = tmp.name

	caption = generate_caption(image_path)
	return JSONResponse({"caption": caption})

	@app.get("/")
	def home():
	return RedirectResponse(url="/")"""
	# appImage.py
	from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
	import tempfile, os
	from PIL import Image
	from gtts import gTTS
	import torch

	try:
	processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
	model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
	USE_GIT = True
	except Exception:
	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
	USE_GIT = False

	async def caption_image(file):
	contents = await file.read()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
	tmp.write(contents)
	image_path = tmp.name

	if USE_GIT:
	image = Image.open(image_path).convert('RGB')
	pixel_values = processor(images=image, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values, max_length=500)
	caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	else:
	captions = captioner(image_path)
	caption = captions[0]['generated_text'] if captions else "No caption generated."

	audio_path = text_to_speech(caption)

	result = {"caption": caption}
	if audio_path:
	result["audio"] = f"/files/{os.path.basename(audio_path)}"
	return result

	def text_to_speech(text: str):
	try:
	tts = gTTS(text)
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_audio.name)
	return temp_audio.name
	except:
	return ""