Spaces:
Sleeping
Sleeping
# ------------------------------------------ | |
# TextDiffuser: Diffusion Models as Text Painters | |
# Paper Link: https://arxiv.org/abs/2305.10855 | |
# Code Link: https://github.com/microsoft/unilm/tree/master/textdiffuser | |
# Copyright (c) Microsoft Corporation. | |
# This file provides the inference script. | |
# ------------------------------------------ | |
import json | |
import os | |
import numpy as np | |
import argparse | |
from clipscore import cal_clipscore | |
from fid_score import calculate_fid_given_paths | |
def eval_clipscore(root_eval, root_res, dataset, device="cuda:0", num_images_per_prompt=4): | |
with open(os.path.join(root_eval, dataset, dataset + '.txt'), 'r') as fr: | |
text_list = fr.readlines() | |
text_list = [_.strip() for _ in text_list] | |
clip_scores = [] | |
scores = [] | |
for seed in range(num_images_per_prompt): | |
if 'stablediffusion' in root_res: | |
format = '.png' | |
else: | |
format = '.jpg' | |
image_list = [os.path.join(root_res, dataset, 'images_' + str(seed), | |
str(idx) + '_' + str(seed) + format) for idx in range(len(text_list))] | |
image_ids = [str(idx) + '_' + str(seed) + format for idx in range(len(text_list))] | |
score = cal_clipscore(image_ids=image_ids, image_paths=image_list, text_list=text_list, device=device) | |
clip_score = np.mean([s['CLIPScore'] for s in score.values()]) | |
clip_scores.append(clip_score) | |
scores.append(score) | |
print("clip_score:", np.mean(clip_scores), clip_scores) | |
return np.mean(clip_scores), scores | |
def MARIOEval_evaluate_results(root, datasets_with_images, datasets, methods, gpu, | |
eval_clipscore_flag=True, eval_fid_flag=True, num_images_per_prompt=4): | |
root_eval = os.path.join(root, "MARIOEval") | |
method_res = {} | |
device = "cuda:" + str(gpu) | |
for method_idx, method in enumerate(methods): | |
if method_idx != gpu: # running in different gpus simultaneously to save time | |
continue | |
print("\nmethod:", method) | |
dataset_res = {} | |
root_res = os.path.join(root, 'generation', method) | |
for dataset in datasets: | |
print("dataset:", dataset) | |
dataset_res[dataset] = {} | |
if eval_clipscore_flag: | |
dataset_res[dataset]['clipscore'], dataset_res[dataset]['scores'] =\ | |
eval_clipscore(root_eval, root_res, dataset, device, num_images_per_prompt) | |
if eval_fid_flag and dataset in datasets_with_images: | |
gt_path = os.path.join(root_eval, dataset, 'images') | |
fids = [] | |
for idx in range(num_images_per_prompt): | |
gen_path = os.path.join(root_res, dataset, 'images_' + str(idx)) | |
fids.append(calculate_fid_given_paths(paths=[gt_path, gen_path])) | |
print("fid:", np.mean(fids), fids) | |
dataset_res[dataset]['fid'] = np.mean(fids) | |
if eval_clipscore_flag: | |
method_clipscores = [] | |
for seed in range(num_images_per_prompt): | |
clipscore_list = [] | |
for dataset in dataset_res.keys(): | |
clipscore_list += [_['CLIPScore'] for _ in dataset_res[dataset]['scores'][seed].values()] | |
method_clipscores.append(np.mean(clipscore_list)) | |
method_clipscore = np.mean(method_clipscores) | |
dataset_res['clipscore'] = method_clipscore | |
if eval_fid_flag: | |
method_fids = [] | |
for idx in range(num_images_per_prompt): | |
gt_paths = [] | |
gen_paths = [] | |
for dataset in dataset_res.keys(): | |
if dataset in datasets_with_images: | |
gt_paths.append(os.path.join(root_eval, dataset, 'images')) | |
gen_paths.append(os.path.join(root_res, dataset, 'images_' + str(idx))) | |
if len(gt_paths): | |
method_fids.append(calculate_fid_given_paths(paths=[gt_paths, gen_paths])) | |
print("fid:", np.mean(method_fids), method_fids) | |
method_fid = np.mean(method_fids) | |
dataset_res['fid'] = method_fid | |
method_res[method] = dataset_res | |
with open(os.path.join(root_res, 'eval.json'), 'w') as fw: | |
json.dump(dataset_res, fw) | |
print(method_res) | |
with open(os.path.join(root, 'generation', 'eval.json'), 'w') as fw: | |
json.dump(method_res, fw) | |
def merge_eval_results(root, methods): | |
method_res = {} | |
for method_idx, method in enumerate(methods): | |
root_res = os.path.join(root, 'generation', method) | |
with open(os.path.join(root_res, 'eval.json'), 'r') as fr: | |
dataset_res = json.load(fr) | |
for k, v in dataset_res.items(): | |
if type(v) is dict: | |
del v['scores'] # too long | |
method_res[method] = dataset_res | |
with open(os.path.join(root, 'generation', 'eval.json'), 'w') as fw: | |
json.dump(method_res, fw) | |
def parse_args(): | |
parser = argparse.ArgumentParser(description="Simple example of a training script.") | |
parser.add_argument( | |
"--dataset", | |
type=str, | |
default='TMDBEval500', | |
required=False, | |
choices=['TMDBEval500', 'OpenLibraryEval500', 'LAIONEval4000', | |
'ChineseDrawText', 'DrawBenchText', 'DrawTextCreative'] | |
) | |
parser.add_argument( | |
"--root", | |
type=str, | |
default="/path/to/data/TextDiffuser/evaluation/", | |
required=True, | |
) | |
parser.add_argument( | |
"--method", | |
type=str, | |
default='controlnet', | |
required=False, | |
choices=['controlnet', 'deepfloyd', 'stablediffusion', 'textdiffuser'] | |
) | |
parser.add_argument( | |
"--gpu", | |
type=int, | |
default=0, | |
required=False, | |
) | |
parser.add_argument( | |
"--split", | |
type=int, | |
default=0, | |
required=False, | |
) | |
parser.add_argument( | |
"--total_split", | |
type=int, | |
default=1, | |
required=False, | |
) | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
args = parse_args() | |
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) | |
datasets_with_images = ['TMDBEval500', 'OpenLibraryEval500', 'LAIONEval4000'] | |
datasets = datasets_with_images + ['ChineseDrawText', 'DrawBenchText', 'DrawTextCreative'] | |
methods = ['textdiffuser', 'controlnet', 'deepfloyd', 'stablediffusion'] | |
MARIOEval_evaluate_results(args.root, datasets_with_images, datasets, methods, args.gpu, | |
eval_clipscore_flag=True, eval_fid_flag=True, num_images_per_prompt=4) | |
merge_eval_results(args.root, methods) | |