abiabidali's picture
Update app.py
d327984 verified
raw
history blame
3.28 kB
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import pandas as pd
import numpy as np
import gradio as gr
import tempfile
import os
import csv
# Initialize the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
def generate_image_caption(image):
"""
Generate a caption for the given image.
Args:
image (PIL.Image): The image object.
Returns:
str: The generated caption.
"""
# Convert image to RGB format
image = image.convert("RGB")
# Preprocess the image and generate a caption
inputs = processor(images=image, return_tensors="pt")
outputs = model.generate(**inputs)
caption = processor.decode(outputs[0], skip_special_tokens=True)
return caption
def generate_keywords(caption):
"""
Generate a list of keywords from the caption.
Args:
caption (str): The image caption.
Returns:
list: A list of single-word keywords.
"""
# Example simple keyword extraction (use a more sophisticated method if needed)
words = caption.split()
keywords = list(set(words))[:50] # Take unique words and limit to 50
return keywords
def process_images(image_files):
"""
Process uploaded images to generate metadata and create a CSV file.
Args:
image_files (list of file-like objects): List of uploaded image files.
Returns:
tuple: A list of PIL images, path to the CSV file.
"""
metadata = []
temp_dir = tempfile.mkdtemp()
for image_file in image_files:
filename = os.path.basename(image_file.name)
image = Image.open(image_file)
caption = generate_image_caption(image)
if caption:
keywords = generate_keywords(caption)
# Ensure the title is within the 70 to 100 characters range
title = caption if 70 <= len(caption) <= 100 else caption[:100]
metadata.append({
'filename': filename,
'title': title,
'keywords': keywords
})
# Create CSV file
csv_file_path = os.path.join(temp_dir, 'images_metadata.csv')
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Filename', 'Title', 'Keywords'])
for data in metadata:
filename = data['filename']
title = data['title']
keywords = ','.join(data['keywords'])
writer.writerow([filename, title, keywords])
return [Image.open(img_file.name) for img_file in image_files], csv_file_path
# Define Gradio interface
iface = gr.Interface(
fn=process_images,
inputs=[
gr.Files(label="Upload Image Files", type="file", multiple=True)
],
outputs=[
gr.Gallery(label="Processed Images"),
gr.File(label="Download Metadata CSV")
],
title="Image Captioning and Metadata Generator",
description="Upload multiple images to generate captions and metadata. Download the metadata as a CSV file."
)
# Launch the interface
iface.launch(debug=True)