File size: 5,225 Bytes
e8aa256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Computes the Frechet Video Distance between videos in two directories.
The videos need to be in GIF format. The number of videos in each directory needs to be a multiple
of 16 (remainders will be discarded).
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os

from tqdm import tqdm
import numpy as np

import tensorflow.compat.v1 as tf
import frechet_video_distance as fvd

argparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
argparser.add_argument('-d0','--dir0', type=str, default='video_dir0')
argparser.add_argument('-d1','--dir1', type=str, default='video_dir0')


# Number of videos must be divisible by 16.
VIDEO_BATCH_SIZE = 16


def main(argv):

  args = argv[0]

  # read file lists from directories
  dir0_gif_paths = [f for f in os.listdir(args.dir0) if f.endswith('.gif')]
  dir0_gif_paths = [os.path.join(args.dir0, f) for f in dir0_gif_paths]
  dir1_gif_paths = [f for f in os.listdir(args.dir1) if f.endswith('.gif')]
  dir1_gif_paths = [os.path.join(args.dir1, f) for f in dir1_gif_paths]
  # assert number of videos to be divisible by 16
  remainder_dir0 = len(dir0_gif_paths) % VIDEO_BATCH_SIZE
  dir0_gif_paths = dir0_gif_paths[:-remainder_dir0]
  remainder_dir1 = len(dir1_gif_paths) % VIDEO_BATCH_SIZE
  dir1_gif_paths = dir1_gif_paths[:-remainder_dir1]

  # loop over video dirs in batches of 16, compute and assemble activations (id3_embedding)
  dir0_embeddings, dir1_embeddings = [], []
  # graph0_initialized, graph1_initialized = False, False
  dir0_embeddings_file = os.path.join(args.dir0, 'id3_embeddings.npy')
  dir1_embeddings_file = os.path.join(args.dir1, 'id3_embeddings.npy')

  # --- dir0 ID3 embeddings
  if os.path.exists(dir0_embeddings_file):
    with open(dir0_embeddings_file, 'rb') as fp:
      dir0_embeddings = np.load(fp)
    print(f">>> Found stored ID3 activations for videos in {args.dir0} in {dir0_embeddings_file}.")
  else:
    print(f">>> Computing ID3 activations for videos in {args.dir0}...")
    for batch_start_idx in tqdm(range(0, len(dir0_gif_paths), VIDEO_BATCH_SIZE)):
      with tf.Graph().as_default():
        # load batch of videos from GIFs and represent as tensor
        dir0_videos = tf.stack(
            [tf.io.decode_gif(tf.io.read_file(f)) \
            for f in dir0_gif_paths[batch_start_idx:batch_start_idx+VIDEO_BATCH_SIZE]])
        with tf.Session() as sess:
          dir0_tensor = sess.run(dir0_videos)
        # define placeholder for subsequent feeding
        ph_dir0_videos = tf.placeholder(shape=[*dir0_tensor.shape], dtype=tf.uint8)
        # calculate embeddings
        id3_embeddings = fvd.create_id3_embedding(fvd.preprocess(ph_dir0_videos, (224, 224)))
        with tf.Session() as sess:
          sess.run(tf.global_variables_initializer())
          sess.run(tf.tables_initializer())
          dir0_embeddings.append(
              sess.run(id3_embeddings, feed_dict={ph_dir0_videos : dir0_tensor}))
    dir0_embeddings = np.concatenate(dir0_embeddings, axis=0)
    with open(dir0_embeddings_file, 'wb') as fp:
      np.save(fp, dir0_embeddings)
      print(f">>> Saved ID3 embeddings for lookup in {dir0_embeddings_file}")
  print(f">>> Embedding matrix: {dir0_embeddings.shape}")

  # --- dir1 ID3 embeddings
  if os.path.exists(dir1_embeddings_file):
    with open(dir1_embeddings_file, 'rb') as fp:
      dir1_embeddings = np.load(fp)
    print(f">>> Found stored ID3 activations for videos in {args.dir1} in {dir1_embeddings_file}.")
  else:
    print(f">>> Computing ID3 activations for videos in {args.dir1}...")
    for batch_start_idx in tqdm(range(0, len(dir1_gif_paths), VIDEO_BATCH_SIZE)):
      with tf.Graph().as_default():
        # load batch of videos from GIFs and represent as tensor
        dir1_videos = tf.stack(
            [tf.io.decode_gif(tf.io.read_file(f)) \
            for f in dir1_gif_paths[batch_start_idx:batch_start_idx+VIDEO_BATCH_SIZE]])
        with tf.Session() as sess:
          dir1_tensor = sess.run(dir1_videos)
        # define placeholder for subsequent feeding
        ph_dir1_videos = tf.placeholder(shape=[*dir1_tensor.shape], dtype=tf.uint8)
        # calculate embeddings
        id3_embeddings = fvd.create_id3_embedding(fvd.preprocess(ph_dir1_videos, (224, 224)))
        with tf.Session() as sess:
          sess.run(tf.global_variables_initializer())
          sess.run(tf.tables_initializer())
          dir1_embeddings.append(
              sess.run(id3_embeddings, feed_dict={ph_dir1_videos : dir1_tensor}))
    dir1_embeddings = np.concatenate(dir1_embeddings, axis=0)
    with open(dir1_embeddings_file, 'wb') as fp:
      np.save(fp, dir1_embeddings)
      print(f">>> Saved ID3 embeddings for lookup in {dir1_embeddings_file}")
  print(f">>> Embedding matrix: {dir1_embeddings.shape}")
  
  # --- final FVD
  with tf.Graph().as_default():
    print(">>> Computing FVD...")
    result = fvd.calculate_fvd(dir0_embeddings, dir1_embeddings)
    with tf.Session() as sess:
      print(">>> FVD is: %.2f." % sess.run(result))


if __name__ == "__main__":
  args = argparser.parse_args()
  argv = [args]
  tf.app.run(main=main, argv=argv)