marlenezw commited on
Commit
b6c3fb5
·
1 Parent(s): 71e8f90

Delete MakeItTalk/animated.py

Browse files
Files changed (1) hide show
  1. MakeItTalk/animated.py +0 -277
MakeItTalk/animated.py DELETED
@@ -1,277 +0,0 @@
1
-
2
- # To add a new cell, type '# %%'
3
- # To add a new markdown cell, type '# %% [markdown]'
4
- # %%
5
- import torch
6
-
7
- # this ensures that the current MacOS version is at least 12.3+
8
- print(torch.backends.mps.is_available())
9
- # this ensures that the current current PyTorch installation was built with MPS activated.
10
- print(torch.backends.mps.is_built())
11
-
12
-
13
- # %%
14
- import ipywidgets as widgets
15
- import glob
16
- import matplotlib.pyplot as plt
17
- print("Choose the image name to animate: (saved in folder 'MakeItTalk/examples/')")
18
- img_list = glob.glob1('examples', '*.jpg')
19
- img_list.sort()
20
- img_list = [item.split('.')[0] for item in img_list]
21
- default_head_name = widgets.Dropdown(options=img_list, value='marlene_v2')
22
- def on_change(change):
23
- if change['type'] == 'change' and change['name'] == 'value':
24
- plt.imshow(plt.imread('MakeItTalk/examples/{}.jpg'.format(default_head_name.value)))
25
- plt.axis('off')
26
- plt.show()
27
- default_head_name.observe(on_change)
28
- display(default_head_name)
29
- plt.imshow(plt.imread('MakeItTalk/examples/{}.jpg'.format(default_head_name.value)))
30
- plt.axis('off')
31
- plt.show()
32
-
33
-
34
- # %%
35
- #@markdown # Animation Controllers
36
- #@markdown Amplify the lip motion in horizontal direction
37
- AMP_LIP_SHAPE_X = 2 #@param {type:"slider", min:0.5, max:5.0, step:0.1}
38
-
39
- #@markdown Amplify the lip motion in vertical direction
40
- AMP_LIP_SHAPE_Y = 2 #@param {type:"slider", min:0.5, max:5.0, step:0.1}
41
-
42
- #@markdown Amplify the head pose motion (usually smaller than 1.0, put it to 0. for a static head pose)
43
- AMP_HEAD_POSE_MOTION = 0.35 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
44
-
45
- #@markdown Add naive eye blink
46
- ADD_NAIVE_EYE = True #@param ["False", "True"] {type:"raw"}
47
-
48
- #@markdown If your image has an opened mouth, put this as True, else False
49
- CLOSE_INPUT_FACE_MOUTH = True #@param ["False", "True"] {type:"raw"}
50
-
51
-
52
- #@markdown # Landmark Adjustment
53
-
54
- #@markdown Adjust upper lip thickness (postive value means thicker)
55
- UPPER_LIP_ADJUST = -1 #@param {type:"slider", min:-3.0, max:3.0, step:1.0}
56
-
57
- #@markdown Adjust lower lip thickness (postive value means thicker)
58
- LOWER_LIP_ADJUST = -1 #@param {type:"slider", min:-3.0, max:3.0, step:1.0}
59
-
60
- #@markdown Adjust static lip width (in multipication)
61
- LIP_WIDTH_ADJUST = 1.0 #@param {type:"slider", min:0.8, max:1.2, step:0.01}
62
-
63
-
64
- # %%
65
- import sys
66
- sys.path.append("thirdparty/AdaptiveWingLoss")
67
- import os, glob
68
- import numpy as np
69
- import cv2
70
- import argparse
71
- from src.approaches.train_image_translation import Image_translation_block
72
- import torch
73
- import pickle
74
- import face_alignment
75
- from face_alignment import face_alignment
76
- from src.autovc.AutoVC_mel_Convertor_retrain_version import AutoVC_mel_Convertor
77
- import shutil
78
- import time
79
- import util.utils as util
80
- from scipy.signal import savgol_filter
81
- from src.approaches.train_audio2landmark import Audio2landmark_model
82
-
83
-
84
- # %%
85
- sys.stdout = open(os.devnull, 'a')
86
-
87
- parser = argparse.ArgumentParser()
88
- parser.add_argument('--jpg', type=str, default='{}.jpg'.format(default_head_name.value))
89
- parser.add_argument('--close_input_face_mouth', default=CLOSE_INPUT_FACE_MOUTH, action='store_true')
90
- parser.add_argument('--load_AUTOVC_name', type=str, default='MakeItTalk/examples/ckpt/ckpt_autovc.pth')
91
- parser.add_argument('--load_a2l_G_name', type=str, default='MakeItTalk/examples/ckpt/ckpt_speaker_branch.pth')
92
- parser.add_argument('--load_a2l_C_name', type=str, default='MakeItTalk/examples/ckpt/ckpt_content_branch.pth') #ckpt_audio2landmark_c.pth')
93
- parser.add_argument('--load_G_name', type=str, default='MakeItTalk/examples/ckpt/ckpt_116_i2i_comb.pth') #ckpt_image2image.pth') #ckpt_i2i_finetune_150.pth') #c
94
- parser.add_argument('--amp_lip_x', type=float, default=AMP_LIP_SHAPE_X)
95
- parser.add_argument('--amp_lip_y', type=float, default=AMP_LIP_SHAPE_Y)
96
- parser.add_argument('--amp_pos', type=float, default=AMP_HEAD_POSE_MOTION)
97
- parser.add_argument('--reuse_train_emb_list', type=str, nargs='+', default=[]) # ['iWeklsXc0H8']) #['45hn7-LXDX8']) #['E_kmpT-EfOg']) #'iWeklsXc0H8', '29k8RtSUjE0', '45hn7-LXDX8',
98
- parser.add_argument('--add_audio_in', default=False, action='store_true')
99
- parser.add_argument('--comb_fan_awing', default=False, action='store_true')
100
- parser.add_argument('--output_folder', type=str, default='examples')
101
- parser.add_argument('--test_end2end', default=True, action='store_true')
102
- parser.add_argument('--dump_dir', type=str, default='', help='')
103
- parser.add_argument('--pos_dim', default=7, type=int)
104
- parser.add_argument('--use_prior_net', default=True, action='store_true')
105
- parser.add_argument('--transformer_d_model', default=32, type=int)
106
- parser.add_argument('--transformer_N', default=2, type=int)
107
- parser.add_argument('--transformer_heads', default=2, type=int)
108
- parser.add_argument('--spk_emb_enc_size', default=16, type=int)
109
- parser.add_argument('--init_content_encoder', type=str, default='')
110
- parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
111
- parser.add_argument('--reg_lr', type=float, default=1e-6, help='weight decay')
112
- parser.add_argument('--write', default=False, action='store_true')
113
- parser.add_argument('--segment_batch_size', type=int, default=1, help='batch size')
114
- parser.add_argument('--emb_coef', default=3.0, type=float)
115
- parser.add_argument('--lambda_laplacian_smooth_loss', default=1.0, type=float)
116
- parser.add_argument('--use_11spk_only', default=False, action='store_true')
117
- parser.add_argument('-f')
118
- opt_parser = parser.parse_args()
119
-
120
-
121
- # %%
122
- img = cv2.imread('MakeItTalk/examples/' + opt_parser.jpg)
123
- plt.imshow(img)
124
-
125
-
126
- # %%
127
- predictor = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='mps', flip_input=True)
128
- shapes = predictor.get_landmarks(img)
129
- if (not shapes or len(shapes) != 1):
130
- print('Cannot detect face landmarks. Exit.')
131
- exit(-1)
132
- shape_3d = shapes[0]
133
-
134
-
135
- # %%
136
- if(opt_parser.close_input_face_mouth):
137
- util.close_input_face_mouth(shape_3d)
138
- shape_3d[48:, 0] = (shape_3d[48:, 0] - np.mean(shape_3d[48:, 0])) * LIP_WIDTH_ADJUST + np.mean(shape_3d[48:, 0]) # wider lips
139
- shape_3d[49:54, 1] -= UPPER_LIP_ADJUST # thinner upper lip
140
- shape_3d[55:60, 1] += LOWER_LIP_ADJUST # thinner lower lip
141
- shape_3d[[37,38,43,44], 1] -=2. # larger eyes
142
- shape_3d[[40,41,46,47], 1] +=2. # larger eyes
143
- shape_3d, scale, shift = util.norm_input_face(shape_3d)
144
-
145
- print("Loaded Image...", file=sys.stderr)
146
-
147
-
148
- # %%
149
- au_data = []
150
- au_emb = []
151
- ains = glob.glob1('examples', '*.wav')
152
- ains = [item for item in ains if item != 'tmp.wav']
153
- ains.sort()
154
- for ain in ains:
155
- os.system('ffmpeg -y -loglevel error -i MakeItTalk/examples/{} -ar 16000 MakeItTalk/examples/tmp.wav'.format(ain))
156
- shutil.copyfile('MakeItTalk/examples/tmp.wav', 'MakeItTalk/examples/{}'.format(ain))
157
-
158
- # au embedding
159
- from thirdparty.resemblyer_util.speaker_emb import get_spk_emb
160
- me, ae = get_spk_emb('MakeItTalk/examples/{}'.format(ain))
161
- au_emb.append(me.reshape(-1))
162
-
163
- print('Processing audio file', ain)
164
- c = AutoVC_mel_Convertor('examples')
165
-
166
- au_data_i = c.convert_single_wav_to_autovc_input(audio_filename=os.path.join('examples', ain),
167
- autovc_model_path=opt_parser.load_AUTOVC_name)
168
- au_data += au_data_i
169
- if(os.path.isfile('MakeItTalk/examples/tmp.wav')):
170
- os.remove('MakeItTalk/examples/tmp.wav')
171
-
172
- print("Loaded audio...", file=sys.stderr)
173
-
174
-
175
-
176
- # %%
177
- # landmark fake placeholder
178
- fl_data = []
179
- rot_tran, rot_quat, anchor_t_shape = [], [], []
180
- for au, info in au_data:
181
- au_length = au.shape[0]
182
- fl = np.zeros(shape=(au_length, 68 * 3))
183
- fl_data.append((fl, info))
184
- rot_tran.append(np.zeros(shape=(au_length, 3, 4)))
185
- rot_quat.append(np.zeros(shape=(au_length, 4)))
186
- anchor_t_shape.append(np.zeros(shape=(au_length, 68 * 3)))
187
-
188
- if(os.path.exists(os.path.join('examples', 'dump', 'random_val_fl.pickle'))):
189
- os.remove(os.path.join('examples', 'dump', 'random_val_fl.pickle'))
190
- if(os.path.exists(os.path.join('examples', 'dump', 'random_val_fl_interp.pickle'))):
191
- os.remove(os.path.join('examples', 'dump', 'random_val_fl_interp.pickle'))
192
- if(os.path.exists(os.path.join('examples', 'dump', 'random_val_au.pickle'))):
193
- os.remove(os.path.join('examples', 'dump', 'random_val_au.pickle'))
194
- if (os.path.exists(os.path.join('examples', 'dump', 'random_val_gaze.pickle'))):
195
- os.remove(os.path.join('examples', 'dump', 'random_val_gaze.pickle'))
196
-
197
- with open(os.path.join('examples', 'dump', 'random_val_fl.pickle'), 'wb') as fp:
198
- pickle.dump(fl_data, fp)
199
- with open(os.path.join('examples', 'dump', 'random_val_au.pickle'), 'wb') as fp:
200
- pickle.dump(au_data, fp)
201
- with open(os.path.join('examples', 'dump', 'random_val_gaze.pickle'), 'wb') as fp:
202
- gaze = {'rot_trans':rot_tran, 'rot_quat':rot_quat, 'anchor_t_shape':anchor_t_shape}
203
- pickle.dump(gaze, fp)
204
-
205
-
206
- # %%
207
- model = Audio2landmark_model(opt_parser, jpg_shape=shape_3d)
208
- if(len(opt_parser.reuse_train_emb_list) == 0):
209
- model.test(au_emb=au_emb)
210
- else:
211
- model.test(au_emb=None)
212
-
213
- print("Audio->Landmark...", file=sys.stderr)
214
-
215
-
216
- # %%
217
- fls = glob.glob1('examples', 'pred_fls_*.txt')
218
- fls.sort()
219
-
220
- for i in range(0,len(fls)):
221
- fl = np.loadtxt(os.path.join('examples', fls[i])).reshape((-1, 68,3))
222
- print(fls[i])
223
- fl[:, :, 0:2] = -fl[:, :, 0:2]
224
- fl[:, :, 0:2] = fl[:, :, 0:2] / scale - shift
225
-
226
- if (ADD_NAIVE_EYE):
227
- fl = util.add_naive_eye(fl)
228
-
229
- # additional smooth
230
- fl = fl.reshape((-1, 204))
231
- fl[:, :48 * 3] = savgol_filter(fl[:, :48 * 3], 15, 3, axis=0)
232
- fl[:, 48*3:] = savgol_filter(fl[:, 48*3:], 5, 3, axis=0)
233
- fl = fl.reshape((-1, 68, 3))
234
-
235
- ''' STEP 6: Imag2image translation '''
236
- model = Image_translation_block(opt_parser, single_test=True)
237
- with torch.no_grad():
238
- model.single_test(jpg=img, fls=fl, filename=fls[i], prefix=opt_parser.jpg.split('.')[0])
239
- print('finish image2image gen')
240
- os.remove(os.path.join('examples', fls[i]))
241
-
242
- print("{} / {}: Landmark->Face...".format(i+1, len(fls)), file=sys.stderr)
243
- print("Done!", file=sys.stderr)
244
-
245
- # %% [markdown]
246
- # # Generated video from image and sound clip
247
-
248
- # %%
249
- from IPython.display import Video
250
-
251
- Video("MakeItTalk/examples/marlenes_v1.mp4")
252
-
253
-
254
- # %%
255
-
256
-
257
-
258
- # %%
259
- from IPython.display import HTML
260
- from base64 import b64encode
261
-
262
- for ain in ains:
263
- OUTPUT_MP4_NAME = '{}_pred_fls_{}_audio_embed.mp4'.format(
264
- opt_parser.jpg.split('.')[0],
265
- ain.split('.')[0]
266
- )
267
- mp4 = open('MakeItTalk/examples/{}'.format(OUTPUT_MP4_NAME),'rb').read()
268
- data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
269
-
270
- print('Display animation: MakeItTalk/examples/{}'.format(OUTPUT_MP4_NAME), file=sys.stderr)
271
- display(HTML("""
272
- <video width=600 controls>
273
- <source src="%s" type="video/mp4">
274
- </video>
275
- """ % data_url))
276
-
277
-