Spaces:

khunjk
/

Computer_Vision_Test

Sleeping

App Files Files Community

Computer_Vision_Test / app.py

khunjk

Create app.py

4eba465 verified 16 days ago

raw

history blame contribute delete

1.91 kB

	import gradio as gr
	import subprocess
	import torch
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForCausalLM



	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
	florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)

	def generate_caption(image):
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
	generated_ids = florence_model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = florence_processor.post_process_generation(
	generated_text,
	task="<MORE_DETAILED_CAPTION>",
	image_size=(image.width, image.height)
	)
	prompt = parsed_answer["<MORE_DETAILED_CAPTION>"]
	print("\n\nGeneration completed!:"+ prompt)
	return prompt

	io = gr.Interface(generate_caption,
	inputs=[gr.Image(label="Input Image")],
	outputs = [gr.Textbox(label="Output Prompt", lines=3, show_copy_button = True),
	],
	theme="Yntec/HaleyCH_Theme_Orange",
	description="⚠ Sorry for the inconvenience. The space are currently running on the CPU, which might affect performance. We appreciate your understanding."
	)
	io.launch(debug=True)