Image-Captioning-Metadata-Generator

Sleeping

App Files Files Community

Image-Captioning-Metadata-Generator / app.py

abiabidali

Update app.py

d327984 verified 9 months ago

raw

history blame

3.28 kB

	from transformers import BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import pandas as pd
	import numpy as np
	import gradio as gr
	import tempfile
	import os
	import csv

	# Initialize the processor and model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	def generate_image_caption(image):
	"""
	Generate a caption for the given image.

	Args:
	image (PIL.Image): The image object.

	Returns:
	str: The generated caption.
	"""
	# Convert image to RGB format
	image = image.convert("RGB")

	# Preprocess the image and generate a caption
	inputs = processor(images=image, return_tensors="pt")
	outputs = model.generate(**inputs)
	caption = processor.decode(outputs[0], skip_special_tokens=True)

	return caption

	def generate_keywords(caption):
	"""
	Generate a list of keywords from the caption.

	Args:
	caption (str): The image caption.

	Returns:
	list: A list of single-word keywords.
	"""
	# Example simple keyword extraction (use a more sophisticated method if needed)
	words = caption.split()
	keywords = list(set(words))[:50] # Take unique words and limit to 50
	return keywords

	def process_images(image_files):
	"""
	Process uploaded images to generate metadata and create a CSV file.

	Args:
	image_files (list of file-like objects): List of uploaded image files.

	Returns:
	tuple: A list of PIL images, path to the CSV file.
	"""
	metadata = []
	temp_dir = tempfile.mkdtemp()

	for image_file in image_files:
	filename = os.path.basename(image_file.name)
	image = Image.open(image_file)
	caption = generate_image_caption(image)
	if caption:
	keywords = generate_keywords(caption)
	# Ensure the title is within the 70 to 100 characters range
	title = caption if 70 <= len(caption) <= 100 else caption[:100]
	metadata.append({
	'filename': filename,
	'title': title,
	'keywords': keywords
	})

	# Create CSV file
	csv_file_path = os.path.join(temp_dir, 'images_metadata.csv')
	with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
	writer = csv.writer(file)
	writer.writerow(['Filename', 'Title', 'Keywords'])
	for data in metadata:
	filename = data['filename']
	title = data['title']
	keywords = ','.join(data['keywords'])
	writer.writerow([filename, title, keywords])

	return [Image.open(img_file.name) for img_file in image_files], csv_file_path

	# Define Gradio interface
	iface = gr.Interface(
	fn=process_images,
	inputs=[
	gr.Files(label="Upload Image Files", type="file", multiple=True)
	],
	outputs=[
	gr.Gallery(label="Processed Images"),
	gr.File(label="Download Metadata CSV")
	],
	title="Image Captioning and Metadata Generator",
	description="Upload multiple images to generate captions and metadata. Download the metadata as a CSV file."
	)

	# Launch the interface
	iface.launch(debug=True)