Spaces:

marlenezw
/

audio-driven-animations

Sleeping

App Files Files Community

marlenezw commited on Feb 15, 2023

Commit

c46f04a

1 Parent(s): b9aa58b

Upload marlenezw/audio-driven-animations/MakeItTalk with huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/.gitignore +8 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/LICENSE +201 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/README.md +82 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__init__.py +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__pycache__/__init__.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__pycache__/__init__.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/ckpt/.gitkeep +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__init__.py +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/__init__.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/__init__.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/coord_conv.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/coord_conv.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/models.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/models.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/coord_conv.py +157 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/dataloader.py +368 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/evaler.py +151 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/models.py +228 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/eval.py +77 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw.png +3 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw_table.png +3 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/requirements.txt +12 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/scripts/eval_wflw.sh +10 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__init__.py +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/__init__.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/__init__.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/utils.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/utils.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/utils.py +354 -0
marlenezw/audio-driven-animations/MakeItTalk/__init__.py +0 -0
marlenezw/audio-driven-animations/MakeItTalk/__pycache__/__init__.cpython-37.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/__pycache__/__init__.cpython-39.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/CODEOWNERS +1 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/LICENCE.txt +21 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/README.md +98 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__init__.py +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__init__.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/__init__.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/data_loading_functions.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/deep_heatmaps_model_fusion_net.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/deformation_functions.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/logging_functions.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/menpo_functions.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/ops.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/pdm_clm_functions.cpython-36.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/crop_training_set.py +38 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/data_loading_functions.py +161 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/data_loading_functions.pyc +0 -0
marlenezw/audio-driven-animations/MakeItTalk/face_of_art/deep_heatmaps_model_fusion_net.py +872 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 marlenezw/audio-driven-animations/MakeItTalk/examples/ckpt filter=lfs diff=lfs merge=lfs -text
 MakeItTalk/examples/ckpt filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 marlenezw/audio-driven-animations/MakeItTalk/examples/ckpt filter=lfs diff=lfs merge=lfs -text
 MakeItTalk/examples/ckpt filter=lfs diff=lfs merge=lfs -text
+marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw.png filter=lfs diff=lfs merge=lfs -text
+marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw_table.png filter=lfs diff=lfs merge=lfs -text
+marlenezw/audio-driven-animations/MakeItTalk/face_of_art/old/teaser.png filter=lfs diff=lfs merge=lfs -text

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Python generated files
+*.pyc
+# Project related files
+ckpt/*.pth
+dataset/*
+!dataset/!.py
+experiments/*

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+# AdaptiveWingLoss
+## [arXiv](https://arxiv.org/abs/1904.07399)
+Pytorch Implementation of Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression.
+<img src='images/wflw.png' width="1000px">
+## Update Logs:
+### October 28, 2019
+* Pretrained Model and evaluation code on WFLW dataset is released.
+## Installation
+#### Note: Code was originally developed under Python2.X and Pytorch 0.4. This released version was revisioned from original code and was tested on Python3.5.7 and Pytorch 1.3.0.
+Install system requirements:
+```
+sudo apt-get install python3-dev python3-pip python3-tk libglib2.0-0
+```
+Install python dependencies:
+```
+pip3 install -r requirements.txt
+```
+## Run Evaluation on WFLW dataset
+1. Download and process WFLW dataset
+    * Download WFLW dataset and annotation from [Here](https://wywu.github.io/projects/LAB/WFLW.html).
+    * Unzip WFLW dataset and annotations and move files into ```./dataset``` directory. Your directory should look like this:
+        ```
+        AdaptiveWingLoss
+        └───dataset
+           │
+           └───WFLW_annotations
+           │   └───list_98pt_rect_attr_train_test
+           │   │
+           │   └───list_98pt_test
+           │
+           └───WFLW_images
+               └───0--Parade
+               │
+               └───...
+        ```
+    * Inside ```./dataset``` directory, run:
+        ```
+        python convert_WFLW.py
+        ```
+        A new directory ```./dataset/WFLW_test``` should be generated with 2500 processed testing images and corresponding landmarks.
+2. Download pretrained model from [Google Drive](https://drive.google.com/file/d/1HZaSjLoorQ4QCEx7PRTxOmg0bBPYSqhH/view?usp=sharing) and put it in ```./ckpt``` directory.
+3. Within ```./Scripts``` directory, run following command:
+    ```
+    sh eval_wflw.sh
+    ```
+    <img src='images/wflw_table.png' width="800px">
+    *GTBbox indicates the ground truth landmarks are used as bounding box to crop faces.
+## Future Plans
+- [x] Release evaluation code and pretrained model on WFLW dataset.
+- [ ] Release training code on WFLW dataset.
+- [ ] Release pretrained model and code on 300W, AFLW and COFW dataset.
+- [ ] Replease facial landmark detection API
+## Citation
+If you find this useful for your research, please cite the following paper.
+```
+@InProceedings{Wang_2019_ICCV,
+author = {Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
+title = {Adaptive Wing Loss for Robust Face Alignment via Heatmap Regression},
+booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+month = {October},
+year = {2019}
+}
+```
+## Acknowledgments
+This repository borrows or partially modifies hourglass model and data processing code from [face alignment](https://github.com/1adrianb/face-alignment) and [pose-hg-train](https://github.com/princeton-vl/pose-hg-train).

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__init__.py ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (164 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (179 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/ckpt/.gitkeep ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__init__.py ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (169 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (184 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/coord_conv.cpython-37.pyc ADDED Viewed

Binary file (4.33 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/coord_conv.cpython-39.pyc ADDED Viewed

Binary file (4.38 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/models.cpython-37.pyc ADDED Viewed

Binary file (5.77 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/__pycache__/models.cpython-39.pyc ADDED Viewed

Binary file (5.83 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/coord_conv.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import torch
+import torch.nn as nn
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class AddCoordsTh(nn.Module):
+    def __init__(self, x_dim=64, y_dim=64, with_r=False, with_boundary=False):
+        super(AddCoordsTh, self).__init__()
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.with_r = with_r
+        self.with_boundary = with_boundary
+    def forward(self, input_tensor, heatmap=None):
+        """
+        input_tensor: (batch, c, x_dim, y_dim)
+        """
+        batch_size_tensor = input_tensor.shape[0]
+        xx_ones = torch.ones([1, self.y_dim], dtype=torch.int32).to(device)
+        xx_ones = xx_ones.unsqueeze(-1)
+        xx_range = torch.arange(self.x_dim, dtype=torch.int32).unsqueeze(0).to(device)
+        xx_range = xx_range.unsqueeze(1)
+        xx_channel = torch.matmul(xx_ones.float(), xx_range.float())
+        xx_channel = xx_channel.unsqueeze(-1)
+        yy_ones = torch.ones([1, self.x_dim], dtype=torch.int32).to(device)
+        yy_ones = yy_ones.unsqueeze(1)
+        yy_range = torch.arange(self.y_dim, dtype=torch.int32).unsqueeze(0).to(device)
+        yy_range = yy_range.unsqueeze(-1)
+        yy_channel = torch.matmul(yy_range.float(), yy_ones.float())
+        yy_channel = yy_channel.unsqueeze(-1)
+        xx_channel = xx_channel.permute(0, 3, 2, 1)
+        yy_channel = yy_channel.permute(0, 3, 2, 1)
+        xx_channel = xx_channel / (self.x_dim - 1)
+        yy_channel = yy_channel / (self.y_dim - 1)
+        xx_channel = xx_channel * 2 - 1
+        yy_channel = yy_channel * 2 - 1
+        xx_channel = xx_channel.repeat(batch_size_tensor, 1, 1, 1)
+        yy_channel = yy_channel.repeat(batch_size_tensor, 1, 1, 1)
+        if self.with_boundary and type(heatmap) != type(None):
+            boundary_channel = torch.clamp(heatmap[:, -1:, :, :],
+                                        0.0, 1.0)
+            zero_tensor = torch.zeros_like(xx_channel)
+            xx_boundary_channel = torch.where(boundary_channel>0.05,
+                                              xx_channel, zero_tensor)
+            yy_boundary_channel = torch.where(boundary_channel>0.05,
+                                              yy_channel, zero_tensor)
+        if self.with_boundary and type(heatmap) != type(None):
+            xx_boundary_channel = xx_boundary_channel.to(device)
+            yy_boundary_channel = yy_boundary_channel.to(device)
+        ret = torch.cat([input_tensor, xx_channel, yy_channel], dim=1)
+        if self.with_r:
+            rr = torch.sqrt(torch.pow(xx_channel, 2) + torch.pow(yy_channel, 2))
+            rr = rr / torch.max(rr)
+            ret = torch.cat([ret, rr], dim=1)
+        if self.with_boundary and type(heatmap) != type(None):
+            ret = torch.cat([ret, xx_boundary_channel,
+                             yy_boundary_channel], dim=1)
+        return ret
+class CoordConvTh(nn.Module):
+    """CoordConv layer as in the paper."""
+    def __init__(self, x_dim, y_dim, with_r, with_boundary,
+                 in_channels, first_one=False, *args, **kwargs):
+        super(CoordConvTh, self).__init__()
+        self.addcoords = AddCoordsTh(x_dim=x_dim, y_dim=y_dim, with_r=with_r,
+                                    with_boundary=with_boundary)
+        in_channels += 2
+        if with_r:
+            in_channels += 1
+        if with_boundary and not first_one:
+            in_channels += 2
+        self.conv = nn.Conv2d(in_channels=in_channels, *args, **kwargs)
+    def forward(self, input_tensor, heatmap=None):
+        ret = self.addcoords(input_tensor, heatmap)
+        last_channel = ret[:, -2:, :, :]
+        ret = self.conv(ret)
+        return ret, last_channel
+'''
+An alternative implementation for PyTorch with auto-infering the x-y dimensions.
+'''
+class AddCoords(nn.Module):
+    def __init__(self, with_r=False):
+        super().__init__()
+        self.with_r = with_r
+    def forward(self, input_tensor):
+        """
+        Args:
+            input_tensor: shape(batch, channel, x_dim, y_dim)
+        """
+        batch_size, _, x_dim, y_dim = input_tensor.size()
+        xx_channel = torch.arange(x_dim).repeat(1, y_dim, 1)
+        yy_channel = torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2)
+        xx_channel = xx_channel / (x_dim - 1)
+        yy_channel = yy_channel / (y_dim - 1)
+        xx_channel = xx_channel * 2 - 1
+        yy_channel = yy_channel * 2 - 1
+        xx_channel = xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
+        yy_channel = yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
+        if input_tensor.is_cuda:
+            xx_channel = xx_channel.to(device)
+            yy_channel = yy_channel.to(device)
+        ret = torch.cat([
+            input_tensor,
+            xx_channel.type_as(input_tensor),
+            yy_channel.type_as(input_tensor)], dim=1)
+        if self.with_r:
+            rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2) + torch.pow(yy_channel - 0.5, 2))
+            if input_tensor.is_cuda:
+                rr = rr.to(device)
+            ret = torch.cat([ret, rr], dim=1)
+        return ret
+class CoordConv(nn.Module):
+    def __init__(self, in_channels, out_channels, with_r=False, **kwargs):
+        super().__init__()
+        self.addcoords = AddCoords(with_r=with_r)
+        self.conv = nn.Conv2d(in_channels + 2, out_channels, **kwargs)
+    def forward(self, x):
+        ret = self.addcoords(x)
+        ret = self.conv(ret)
+        return ret

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/dataloader.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import sys
+import os
+import random
+import glob
+import torch
+from skimage import io
+from skimage import transform as ski_transform
+from skimage.color import rgb2gray
+import scipy.io as sio
+from scipy import interpolate
+import numpy as np
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+from torchvision.transforms import Lambda, Compose
+from torchvision.transforms.functional import adjust_brightness, adjust_contrast, adjust_saturation, adjust_hue
+from utils.utils import cv_crop, cv_rotate, draw_gaussian, transform, power_transform, shuffle_lr, fig2data, generate_weight_map
+from PIL import Image
+import cv2
+import copy
+import math
+from imgaug import augmenters as iaa
+class AddBoundary(object):
+    def __init__(self, num_landmarks=68):
+        self.num_landmarks = num_landmarks
+    def __call__(self, sample):
+        landmarks_64 = np.floor(sample['landmarks'] / 4.0)
+        if self.num_landmarks == 68:
+            boundaries = {}
+            boundaries['cheek'] = landmarks_64[0:17]
+            boundaries['left_eyebrow'] = landmarks_64[17:22]
+            boundaries['right_eyebrow'] = landmarks_64[22:27]
+            boundaries['uper_left_eyelid'] = landmarks_64[36:40]
+            boundaries['lower_left_eyelid'] = np.array([landmarks_64[i] for i in [36, 41, 40, 39]])
+            boundaries['upper_right_eyelid'] = landmarks_64[42:46]
+            boundaries['lower_right_eyelid'] = np.array([landmarks_64[i] for i in [42, 47, 46, 45]])
+            boundaries['noise'] = landmarks_64[27:31]
+            boundaries['noise_bot'] = landmarks_64[31:36]
+            boundaries['upper_outer_lip'] = landmarks_64[48:55]
+            boundaries['upper_inner_lip'] = np.array([landmarks_64[i] for i in [60, 61, 62, 63, 64]])
+            boundaries['lower_outer_lip'] = np.array([landmarks_64[i] for i in [48, 59, 58, 57, 56, 55, 54]])
+            boundaries['lower_inner_lip'] = np.array([landmarks_64[i] for i in [60, 67, 66, 65, 64]])
+        elif self.num_landmarks == 98:
+            boundaries = {}
+            boundaries['cheek'] = landmarks_64[0:33]
+            boundaries['left_eyebrow'] = landmarks_64[33:38]
+            boundaries['right_eyebrow'] = landmarks_64[42:47]
+            boundaries['uper_left_eyelid'] = landmarks_64[60:65]
+            boundaries['lower_left_eyelid'] = np.array([landmarks_64[i] for i in [60, 67, 66, 65, 64]])
+            boundaries['upper_right_eyelid'] = landmarks_64[68:73]
+            boundaries['lower_right_eyelid'] = np.array([landmarks_64[i] for i in [68, 75, 74, 73, 72]])
+            boundaries['noise'] = landmarks_64[51:55]
+            boundaries['noise_bot'] = landmarks_64[55:60]
+            boundaries['upper_outer_lip'] = landmarks_64[76:83]
+            boundaries['upper_inner_lip'] = np.array([landmarks_64[i] for i in [88, 89, 90, 91, 92]])
+            boundaries['lower_outer_lip'] = np.array([landmarks_64[i] for i in [76, 87, 86, 85, 84, 83, 82]])
+            boundaries['lower_inner_lip'] = np.array([landmarks_64[i] for i in [88, 95, 94, 93, 92]])
+        elif self.num_landmarks == 19:
+            boundaries = {}
+            boundaries['left_eyebrow'] = landmarks_64[0:3]
+            boundaries['right_eyebrow'] = landmarks_64[3:5]
+            boundaries['left_eye'] = landmarks_64[6:9]
+            boundaries['right_eye'] = landmarks_64[9:12]
+            boundaries['noise'] = landmarks_64[12:15]
+        elif self.num_landmarks == 29:
+            boundaries = {}
+            boundaries['upper_left_eyebrow'] = np.stack([
+                landmarks_64[0],
+                landmarks_64[4],
+                landmarks_64[2]
+            ], axis=0)
+            boundaries['lower_left_eyebrow'] = np.stack([
+                landmarks_64[0],
+                landmarks_64[5],
+                landmarks_64[2]
+            ], axis=0)
+            boundaries['upper_right_eyebrow'] = np.stack([
+                landmarks_64[1],
+                landmarks_64[6],
+                landmarks_64[3]
+            ], axis=0)
+            boundaries['lower_right_eyebrow'] = np.stack([
+                landmarks_64[1],
+                landmarks_64[7],
+                landmarks_64[3]
+            ], axis=0)
+            boundaries['upper_left_eye'] = np.stack([
+                landmarks_64[8],
+                landmarks_64[12],
+                landmarks_64[10]
+            ], axis=0)
+            boundaries['lower_left_eye'] = np.stack([
+                landmarks_64[8],
+                landmarks_64[13],
+                landmarks_64[10]
+            ], axis=0)
+            boundaries['upper_right_eye'] = np.stack([
+                landmarks_64[9],
+                landmarks_64[14],
+                landmarks_64[11]
+            ], axis=0)
+            boundaries['lower_right_eye'] = np.stack([
+                landmarks_64[9],
+                landmarks_64[15],
+                landmarks_64[11]
+            ], axis=0)
+            boundaries['noise'] = np.stack([
+                landmarks_64[18],
+                landmarks_64[21],
+                landmarks_64[19]
+            ], axis=0)
+            boundaries['outer_upper_lip'] = np.stack([
+                landmarks_64[22],
+                landmarks_64[24],
+                landmarks_64[23]
+            ], axis=0)
+            boundaries['inner_upper_lip'] = np.stack([
+                landmarks_64[22],
+                landmarks_64[25],
+                landmarks_64[23]
+            ], axis=0)
+            boundaries['outer_lower_lip'] = np.stack([
+                landmarks_64[22],
+                landmarks_64[26],
+                landmarks_64[23]
+            ], axis=0)
+            boundaries['inner_lower_lip'] = np.stack([
+                landmarks_64[22],
+                landmarks_64[27],
+                landmarks_64[23]
+            ], axis=0)
+        functions = {}
+        for key, points in boundaries.items():
+            temp = points[0]
+            new_points = points[0:1, :]
+            for point in points[1:]:
+                if point[0] == temp[0] and point[1] == temp[1]:
+                    continue
+                else:
+                    new_points = np.concatenate((new_points, np.expand_dims(point, 0)), axis=0)
+                    temp = point
+            points = new_points
+            if points.shape[0] == 1:
+                points = np.concatenate((points, points+0.001), axis=0)
+            k = min(4, points.shape[0])
+            functions[key] = interpolate.splprep([points[:, 0], points[:, 1]], k=k-1,s=0)
+        boundary_map = np.zeros((64, 64))
+        fig = plt.figure(figsize=[64/96.0, 64/96.0], dpi=96)
+        ax = fig.add_axes([0, 0, 1, 1])
+        ax.axis('off')
+        ax.imshow(boundary_map, interpolation='nearest', cmap='gray')
+        #ax.scatter(landmarks[:, 0], landmarks[:, 1], s=1, marker=',', c='w')
+        for key in functions.keys():
+            xnew = np.arange(0, 1, 0.01)
+            out = interpolate.splev(xnew, functions[key][0], der=0)
+            plt.plot(out[0], out[1], ',', linewidth=1, color='w')
+        img = fig2data(fig)
+        plt.close()
+        sigma = 1
+        temp = 255-img[:,:,1]
+        temp = cv2.distanceTransform(temp, cv2.DIST_L2, cv2.DIST_MASK_PRECISE)
+        temp = temp.astype(np.float32)
+        temp = np.where(temp < 3*sigma, np.exp(-(temp*temp)/(2*sigma*sigma)), 0 )
+        fig = plt.figure(figsize=[64/96.0, 64/96.0], dpi=96)
+        ax = fig.add_axes([0, 0, 1, 1])
+        ax.axis('off')
+        ax.imshow(temp, cmap='gray')
+        plt.close()
+        boundary_map = fig2data(fig)
+        sample['boundary'] = boundary_map[:, :, 0]
+        return sample
+class AddWeightMap(object):
+    def __call__(self, sample):
+        heatmap= sample['heatmap']
+        boundary = sample['boundary']
+        heatmap = np.concatenate((heatmap, np.expand_dims(boundary, axis=0)), 0)
+        weight_map = np.zeros_like(heatmap)
+        for i in range(heatmap.shape[0]):
+            weight_map[i] = generate_weight_map(weight_map[i],
+                                                heatmap[i])
+        sample['weight_map'] = weight_map
+        return sample
+class ToTensor(object):
+    """Convert ndarrays in sample to Tensors."""
+    def __call__(self, sample):
+        image, heatmap, landmarks, boundary, weight_map= sample['image'], sample['heatmap'], sample['landmarks'], sample['boundary'], sample['weight_map']
+        # swap color axis because
+        # numpy image: H x W x C
+        # torch image: C X H X W
+        if len(image.shape) == 2:
+            image = np.expand_dims(image, axis=2)
+            image_small = np.expand_dims(image_small, axis=2)
+        image = image.transpose((2, 0, 1))
+        boundary = np.expand_dims(boundary, axis=2)
+        boundary = boundary.transpose((2, 0, 1))
+        return {'image': torch.from_numpy(image).float().div(255.0),
+                'heatmap': torch.from_numpy(heatmap).float(),
+                'landmarks': torch.from_numpy(landmarks).float(),
+                'boundary': torch.from_numpy(boundary).float().div(255.0),
+                'weight_map': torch.from_numpy(weight_map).float()}
+class FaceLandmarksDataset(Dataset):
+    """Face Landmarks dataset."""
+    def __init__(self, img_dir, landmarks_dir, num_landmarks=68, gray_scale=False,
+                 detect_face=False, enhance=False, center_shift=0,
+                 transform=None,):
+        """
+        Args:
+            landmark_dir (string): Path to the mat file with landmarks saved.
+            img_dir (string): Directory with all the images.
+            transform (callable, optional): Optional transform to be applied
+                on a sample.
+        """
+        self.img_dir = img_dir
+        self.landmarks_dir = landmarks_dir
+        self.num_lanmdkars = num_landmarks
+        self.transform = transform
+        self.img_names = glob.glob(self.img_dir+'*.jpg') + \
+                         glob.glob(self.img_dir+'*.png')
+        self.gray_scale = gray_scale
+        self.detect_face = detect_face
+        self.enhance = enhance
+        self.center_shift = center_shift
+        if self.detect_face:
+            self.face_detector = MTCNN(thresh=[0.5, 0.6, 0.7])
+    def __len__(self):
+        return len(self.img_names)
+    def __getitem__(self, idx):
+        img_name = self.img_names[idx]
+        pil_image = Image.open(img_name)
+        if pil_image.mode != "RGB":
+            # if input is grayscale image, convert it to 3 channel image
+            if self.enhance:
+                pil_image = power_transform(pil_image, 0.5)
+            temp_image = Image.new('RGB', pil_image.size)
+            temp_image.paste(pil_image)
+            pil_image = temp_image
+        image = np.array(pil_image)
+        if self.gray_scale:
+            image = rgb2gray(image)
+            image = np.expand_dims(image, axis=2)
+            image = np.concatenate((image, image, image), axis=2)
+            image = image * 255.0
+            image = image.astype(np.uint8)
+        if not self.detect_face:
+            center = [450//2, 450//2+0]
+            if self.center_shift != 0:
+                center[0] += int(np.random.uniform(-self.center_shift,
+                                               self.center_shift))
+                center[1] += int(np.random.uniform(-self.center_shift,
+                                               self.center_shift))
+            scale = 1.8
+        else:
+            detected_faces = self.face_detector.detect_image(image)
+            if len(detected_faces) > 0:
+                box = detected_faces[0]
+                left, top, right, bottom, _ = box
+                center = [right - (right - left) / 2.0,
+                        bottom - (bottom - top) / 2.0]
+                center[1] = center[1] - (bottom - top) * 0.12
+                scale = (right - left + bottom - top) / 195.0
+            else:
+                center = [450//2, 450//2+0]
+                scale = 1.8
+            if self.center_shift != 0:
+                shift = self.center * self.center_shift / 450
+                center[0] += int(np.random.uniform(-shift, shift))
+                center[1] += int(np.random.uniform(-shift, shift))
+        base_name = os.path.basename(img_name)
+        landmarks_base_name = base_name[:-4] + '_pts.mat'
+        landmarks_name = os.path.join(self.landmarks_dir, landmarks_base_name)
+        if os.path.isfile(landmarks_name):
+            mat_data = sio.loadmat(landmarks_name)
+            landmarks = mat_data['pts_2d']
+        elif os.path.isfile(landmarks_name[:-8] + '.pts.npy'):
+            landmarks = np.load(landmarks_name[:-8] + '.pts.npy')
+        else:
+            landmarks = []
+            heatmap = []
+        if landmarks != []:
+            new_image, new_landmarks = cv_crop(image, landmarks, center,
+                                               scale, 256, self.center_shift)
+            tries = 0
+            while self.center_shift != 0 and tries < 5 and (np.max(new_landmarks) > 240 or np.min(new_landmarks) < 15):
+                center = [450//2, 450//2+0]
+                scale += 0.05
+                center[0] += int(np.random.uniform(-self.center_shift,
+                                            self.center_shift))
+                center[1] += int(np.random.uniform(-self.center_shift,
+                                            self.center_shift))
+                new_image, new_landmarks = cv_crop(image, landmarks,
+                                                    center, scale, 256,
+                                                    self.center_shift)
+                tries += 1
+            if np.max(new_landmarks) > 250 or np.min(new_landmarks) < 5:
+                center = [450//2, 450//2+0]
+                scale = 2.25
+                new_image, new_landmarks = cv_crop(image, landmarks,
+                                                    center, scale, 256,
+                                                    100)
+            assert (np.min(new_landmarks) > 0 and np.max(new_landmarks) < 256), \
+                "Landmarks out of boundary!"
+            image = new_image
+            landmarks = new_landmarks
+            heatmap = np.zeros((self.num_lanmdkars, 64, 64))
+            for i in range(self.num_lanmdkars):
+                if landmarks[i][0] > 0:
+                    heatmap[i] = draw_gaussian(heatmap[i], landmarks[i]/4.0+1, 1)
+        sample = {'image': image, 'heatmap': heatmap, 'landmarks': landmarks}
+        if self.transform:
+            sample = self.transform(sample)
+        return sample
+def get_dataset(val_img_dir, val_landmarks_dir, batch_size,
+                num_landmarks=68, rotation=0, scale=0,
+                center_shift=0, random_flip=False,
+                brightness=0, contrast=0, saturation=0,
+                blur=False, noise=False, jpeg_effect=False,
+                random_occlusion=False, gray_scale=False,
+                detect_face=False, enhance=False):
+    val_transforms = transforms.Compose([AddBoundary(num_landmarks),
+                                         AddWeightMap(),
+                                         ToTensor()])
+    val_dataset = FaceLandmarksDataset(val_img_dir, val_landmarks_dir,
+                                       num_landmarks=num_landmarks,
+                                       gray_scale=gray_scale,
+                                       detect_face=detect_face,
+                                       enhance=enhance,
+                                       transform=val_transforms)
+    val_dataloader = torch.utils.data.DataLoader(val_dataset,
+                                                   batch_size=batch_size,
+                                                   shuffle=False,
+                                                   num_workers=6)
+    data_loaders = {'val': val_dataloader}
+    dataset_sizes = {}
+    dataset_sizes['val'] = len(val_dataset)
+    return data_loaders, dataset_sizes

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/evaler.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import matplotlib
+matplotlib.use('Agg')
+import math
+import torch
+import copy
+import time
+from torch.autograd import Variable
+import shutil
+from skimage import io
+import numpy as np
+from utils.utils import fan_NME, show_landmarks, get_preds_fromhm
+from PIL import Image, ImageDraw
+import os
+import sys
+import cv2
+import matplotlib.pyplot as plt
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def eval_model(model, dataloaders, dataset_sizes,
+               writer, use_gpu=True, epoches=5, dataset='val',
+               save_path='./', num_landmarks=68):
+    global_nme = 0
+    model.eval()
+    for epoch in range(epoches):
+        running_loss = 0
+        step = 0
+        total_nme = 0
+        total_count = 0
+        fail_count = 0
+        nmes = []
+        # running_corrects = 0
+        # Iterate over data.
+        with torch.no_grad():
+            for data in dataloaders[dataset]:
+                total_runtime = 0
+                run_count = 0
+                step_start = time.time()
+                step += 1
+                # get the inputs
+                inputs = data['image'].type(torch.FloatTensor)
+                labels_heatmap = data['heatmap'].type(torch.FloatTensor)
+                labels_boundary = data['boundary'].type(torch.FloatTensor)
+                landmarks = data['landmarks'].type(torch.FloatTensor)
+                loss_weight_map = data['weight_map'].type(torch.FloatTensor)
+                # wrap them in Variable
+                if use_gpu:
+                    inputs = inputs.to(device)
+                    labels_heatmap = labels_heatmap.to(device)
+                    labels_boundary = labels_boundary.to(device)
+                    loss_weight_map = loss_weight_map.to(device)
+                else:
+                    inputs, labels_heatmap = Variable(inputs), Variable(labels_heatmap)
+                    labels_boundary = Variable(labels_boundary)
+                labels = torch.cat((labels_heatmap, labels_boundary), 1)
+                single_start = time.time()
+                outputs, boundary_channels = model(inputs)
+                single_end = time.time()
+                total_runtime += time.time() - single_start
+                run_count += 1
+                step_end = time.time()
+                for i in range(inputs.shape[0]):
+                    print(inputs.shape)
+                    img = inputs[i]
+                    img = img.cpu().numpy()
+                    img = img.transpose((1, 2, 0)) #*255.0
+                    # img = img.astype(np.uint8)
+                    # img = Image.fromarray(img)
+                    # pred_heatmap = outputs[-1][i].detach().cpu()[:-1, :, :]
+                    pred_heatmap = outputs[-1][:, :-1, :, :][i].detach().cpu()
+                    pred_landmarks, _ = get_preds_fromhm(pred_heatmap.unsqueeze(0))
+                    pred_landmarks = pred_landmarks.squeeze().numpy()
+                    gt_landmarks = data['landmarks'][i].numpy()
+                    print(pred_landmarks, gt_landmarks)
+                    import cv2
+                    while(True):
+                        imgshow = vis_landmark_on_img(cv2.UMat(img), pred_landmarks*4)
+                        cv2.imshow('img', imgshow)
+                        if(cv2.waitKey(10) == ord('q')):
+                            break
+                    if num_landmarks == 68:
+                        left_eye = np.average(gt_landmarks[36:42], axis=0)
+                        right_eye = np.average(gt_landmarks[42:48], axis=0)
+                        norm_factor = np.linalg.norm(left_eye - right_eye)
+                        # norm_factor = np.linalg.norm(gt_landmarks[36]- gt_landmarks[45])
+                    elif num_landmarks == 98:
+                        norm_factor = np.linalg.norm(gt_landmarks[60]- gt_landmarks[72])
+                    elif num_landmarks == 19:
+                        left, top = gt_landmarks[-2, :]
+                        right, bottom = gt_landmarks[-1, :]
+                        norm_factor = math.sqrt(abs(right - left)*abs(top-bottom))
+                        gt_landmarks = gt_landmarks[:-2, :]
+                    elif num_landmarks == 29:
+                        # norm_factor = np.linalg.norm(gt_landmarks[8]- gt_landmarks[9])
+                        norm_factor = np.linalg.norm(gt_landmarks[16]- gt_landmarks[17])
+                    single_nme = (np.sum(np.linalg.norm(pred_landmarks*4 - gt_landmarks, axis=1)) / pred_landmarks.shape[0]) / norm_factor
+                    nmes.append(single_nme)
+                    total_count += 1
+                    if single_nme > 0.1:
+                        fail_count += 1
+                if step % 10 == 0:
+                    print('Step {} Time: {:.6f} Input Mean: {:.6f} Output Mean: {:.6f}'.format(
+                        step, step_end - step_start,
+                        torch.mean(labels),
+                        torch.mean(outputs[0])))
+                # gt_landmarks = landmarks.numpy()
+                # pred_heatmap = outputs[-1].to('cpu').numpy()
+                gt_landmarks = landmarks
+                batch_nme = fan_NME(outputs[-1][:, :-1, :, :].detach().cpu(), gt_landmarks, num_landmarks)
+                # batch_nme = 0
+                total_nme += batch_nme
+        epoch_nme = total_nme / dataset_sizes['val']
+        global_nme += epoch_nme
+        nme_save_path = os.path.join(save_path, 'nme_log.npy')
+        np.save(nme_save_path, np.array(nmes))
+        print('NME: {:.6f} Failure Rate: {:.6f} Total Count: {:.6f} Fail Count: {:.6f}'.format(epoch_nme, fail_count/total_count, total_count, fail_count))
+    print('Evaluation done! Average NME: {:.6f}'.format(global_nme/epoches))
+    print('Everage runtime for a single batch: {:.6f}'.format(total_runtime/run_count))
+    return model
+def vis_landmark_on_img(img, shape, linewidth=2):
+    '''
+    Visualize landmark on images.
+    '''
+    def draw_curve(idx_list, color=(0, 255, 0), loop=False, lineWidth=linewidth):
+        for i in idx_list:
+            cv2.line(img, (shape[i, 0], shape[i, 1]), (shape[i + 1, 0], shape[i + 1, 1]), color, lineWidth)
+        if (loop):
+            cv2.line(img, (shape[idx_list[0], 0], shape[idx_list[0], 1]),
+                     (shape[idx_list[-1] + 1, 0], shape[idx_list[-1] + 1, 1]), color, lineWidth)
+    draw_curve(list(range(0, 32)))  # jaw
+    draw_curve(list(range(33, 41)), color=(0, 0, 255), loop=True)  # eye brow
+    draw_curve(list(range(42, 50)), color=(0, 0, 255), loop=True)
+    draw_curve(list(range(51, 59)))  # nose
+    draw_curve(list(range(60, 67)), loop=True)  # eyes
+    draw_curve(list(range(68, 75)), loop=True)
+    draw_curve(list(range(76, 87)), loop=True, color=(0, 255, 255))  # mouth
+    draw_curve(list(range(88, 95)), loop=True, color=(255, 255, 0))
+    return img

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/core/models.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from core.coord_conv import CoordConvTh
+def conv3x3(in_planes, out_planes, strd=1, padding=1,
+            bias=False,dilation=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3,
+                     stride=strd, padding=padding, bias=bias,
+                     dilation=dilation)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        # self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        # self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        # out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        # out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ConvBlock(nn.Module):
+    def __init__(self, in_planes, out_planes):
+        super(ConvBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = conv3x3(in_planes, int(out_planes / 2))
+        self.bn2 = nn.BatchNorm2d(int(out_planes / 2))
+        self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4),
+                             padding=1, dilation=1)
+        self.bn3 = nn.BatchNorm2d(int(out_planes / 4))
+        self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4),
+                             padding=1, dilation=1)
+        if in_planes != out_planes:
+            self.downsample = nn.Sequential(
+                nn.BatchNorm2d(in_planes),
+                nn.ReLU(True),
+                nn.Conv2d(in_planes, out_planes,
+                          kernel_size=1, stride=1, bias=False),
+            )
+        else:
+            self.downsample = None
+    def forward(self, x):
+        residual = x
+        out1 = self.bn1(x)
+        out1 = F.relu(out1, True)
+        out1 = self.conv1(out1)
+        out2 = self.bn2(out1)
+        out2 = F.relu(out2, True)
+        out2 = self.conv2(out2)
+        out3 = self.bn3(out2)
+        out3 = F.relu(out3, True)
+        out3 = self.conv3(out3)
+        out3 = torch.cat((out1, out2, out3), 1)
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+        out3 += residual
+        return out3
+class HourGlass(nn.Module):
+    def __init__(self, num_modules, depth, num_features, first_one=False):
+        super(HourGlass, self).__init__()
+        self.num_modules = num_modules
+        self.depth = depth
+        self.features = num_features
+        self.coordconv = CoordConvTh(x_dim=64, y_dim=64,
+                                     with_r=True, with_boundary=True,
+                                     in_channels=256, first_one=first_one,
+                                     out_channels=256,
+                                     kernel_size=1,
+                                     stride=1, padding=0)
+        self._generate_network(self.depth)
+    def _generate_network(self, level):
+        self.add_module('b1_' + str(level), ConvBlock(256, 256))
+        self.add_module('b2_' + str(level), ConvBlock(256, 256))
+        if level > 1:
+            self._generate_network(level - 1)
+        else:
+            self.add_module('b2_plus_' + str(level), ConvBlock(256, 256))
+        self.add_module('b3_' + str(level), ConvBlock(256, 256))
+    def _forward(self, level, inp):
+        # Upper branch
+        up1 = inp
+        up1 = self._modules['b1_' + str(level)](up1)
+        # Lower branch
+        low1 = F.avg_pool2d(inp, 2, stride=2)
+        low1 = self._modules['b2_' + str(level)](low1)
+        if level > 1:
+            low2 = self._forward(level - 1, low1)
+        else:
+            low2 = low1
+            low2 = self._modules['b2_plus_' + str(level)](low2)
+        low3 = low2
+        low3 = self._modules['b3_' + str(level)](low3)
+        up2 = F.upsample(low3, scale_factor=2, mode='nearest')
+        return up1 + up2
+    def forward(self, x, heatmap):
+        x, last_channel = self.coordconv(x, heatmap)
+        return self._forward(self.depth, x), last_channel
+class FAN(nn.Module):
+    def __init__(self, num_modules=1, end_relu=False, gray_scale=False,
+                 num_landmarks=68):
+        super(FAN, self).__init__()
+        self.num_modules = num_modules
+        self.gray_scale = gray_scale
+        self.end_relu = end_relu
+        self.num_landmarks = num_landmarks
+        # Base part
+        if self.gray_scale:
+            self.conv1 = CoordConvTh(x_dim=256, y_dim=256,
+                                     with_r=True, with_boundary=False,
+                                     in_channels=3, out_channels=64,
+                                     kernel_size=7,
+                                     stride=2, padding=3)
+        else:
+            self.conv1 = CoordConvTh(x_dim=256, y_dim=256,
+                                     with_r=True, with_boundary=False,
+                                     in_channels=3, out_channels=64,
+                                     kernel_size=7,
+                                     stride=2, padding=3)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.conv2 = ConvBlock(64, 128)
+        self.conv3 = ConvBlock(128, 128)
+        self.conv4 = ConvBlock(128, 256)
+        # Stacking part
+        for hg_module in range(self.num_modules):
+            if hg_module == 0:
+                first_one = True
+            else:
+                first_one = False
+            self.add_module('m' + str(hg_module), HourGlass(1, 4, 256,
+                                                            first_one))
+            self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256))
+            self.add_module('conv_last' + str(hg_module),
+                            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
+            self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
+            self.add_module('l' + str(hg_module), nn.Conv2d(256,
+                                                            num_landmarks+1, kernel_size=1, stride=1, padding=0))
+            if hg_module < self.num_modules - 1:
+                self.add_module(
+                    'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
+                self.add_module('al' + str(hg_module), nn.Conv2d(num_landmarks+1,
+                                                                 256, kernel_size=1, stride=1, padding=0))
+    def forward(self, x):
+        x, _ = self.conv1(x)
+        x = F.relu(self.bn1(x), True)
+        # x = F.relu(self.bn1(self.conv1(x)), True)
+        x = F.avg_pool2d(self.conv2(x), 2, stride=2)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        previous = x
+        outputs = []
+        boundary_channels = []
+        tmp_out = None
+        for i in range(self.num_modules):
+            hg, boundary_channel = self._modules['m' + str(i)](previous,
+                                                               tmp_out)
+            ll = hg
+            ll = self._modules['top_m_' + str(i)](ll)
+            ll = F.relu(self._modules['bn_end' + str(i)]
+                        (self._modules['conv_last' + str(i)](ll)), True)
+            # Predict heatmaps
+            tmp_out = self._modules['l' + str(i)](ll)
+            if self.end_relu:
+                tmp_out = F.relu(tmp_out) # HACK: Added relu
+            outputs.append(tmp_out)
+            boundary_channels.append(boundary_channel)
+            if i < self.num_modules - 1:
+                ll = self._modules['bl' + str(i)](ll)
+                tmp_out_ = self._modules['al' + str(i)](tmp_out)
+                previous = previous + ll + tmp_out_
+        return outputs, boundary_channels

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/eval.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from __future__ import print_function, division
+import torch
+import argparse
+import numpy as np
+import torch.nn as nn
+import time
+import os
+from core.evaler import eval_model
+from core.dataloader import get_dataset
+from core import models
+from tensorboardX import SummaryWriter
+# Parse arguments
+parser = argparse.ArgumentParser()
+# Dataset paths
+parser.add_argument('--val_img_dir', type=str,
+                    help='Validation image directory')
+parser.add_argument('--val_landmarks_dir', type=str,
+                    help='Validation landmarks directory')
+parser.add_argument('--num_landmarks', type=int, default=68,
+                    help='Number of landmarks')
+# Checkpoint and pretrained weights
+parser.add_argument('--ckpt_save_path', type=str,
+                    help='a directory to save checkpoint file')
+parser.add_argument('--pretrained_weights', type=str,
+                    help='a directory to save pretrained_weights')
+# Eval options
+parser.add_argument('--batch_size', type=int, default=25,
+                    help='learning rate decay after each epoch')
+# Network parameters
+parser.add_argument('--hg_blocks', type=int, default=4,
+                    help='Number of HG blocks to stack')
+parser.add_argument('--gray_scale', type=str, default="False",
+                    help='Whether to convert RGB image into gray scale during training')
+parser.add_argument('--end_relu', type=str, default="False",
+                    help='Whether to add relu at the end of each HG module')
+args = parser.parse_args()
+VAL_IMG_DIR = args.val_img_dir
+VAL_LANDMARKS_DIR = args.val_landmarks_dir
+CKPT_SAVE_PATH = args.ckpt_save_path
+BATCH_SIZE = args.batch_size
+PRETRAINED_WEIGHTS = args.pretrained_weights
+GRAY_SCALE = False if args.gray_scale == 'False' else True
+HG_BLOCKS = args.hg_blocks
+END_RELU = False if args.end_relu == 'False' else True
+NUM_LANDMARKS = args.num_landmarks
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+writer = SummaryWriter(CKPT_SAVE_PATH)
+dataloaders, dataset_sizes = get_dataset(VAL_IMG_DIR, VAL_LANDMARKS_DIR,
+                                         BATCH_SIZE, NUM_LANDMARKS)
+use_gpu = torch.cuda.is_available()
+model_ft = models.FAN(HG_BLOCKS, END_RELU, GRAY_SCALE, NUM_LANDMARKS)
+if PRETRAINED_WEIGHTS != "None":
+    checkpoint = torch.load(PRETRAINED_WEIGHTS)
+    if 'state_dict' not in checkpoint:
+        model_ft.load_state_dict(checkpoint)
+    else:
+        pretrained_weights = checkpoint['state_dict']
+        model_weights = model_ft.state_dict()
+        pretrained_weights = {k: v for k, v in pretrained_weights.items() \
+                              if k in model_weights}
+        model_weights.update(pretrained_weights)
+        model_ft.load_state_dict(model_weights)
+model_ft = model_ft.to(device)
+model_ft = eval_model(model_ft, dataloaders, dataset_sizes, writer, use_gpu, 1, 'val', CKPT_SAVE_PATH, NUM_LANDMARKS)

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw.png ADDED Viewed

Git LFS Details

SHA256: 354babe46beeec86fc8a9f64c57a1dad0ec19ff23f455ac3405321bab473ce23
Pointer size: 132 Bytes
Size of remote file: 2.95 MB

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/images/wflw_table.png ADDED Viewed

Git LFS Details

SHA256: 87c9ea0af4854681b6fc5e911ac38042ca5099098146501f20b64a6457a9d98b
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+opencv-python
+scipy>=0.17.0
+scikit-image
+numpy
+matplotlib
+Pillow>=4.3.0
+imgaug
+tensorflow
+git+https://github.com/lanpa/tensorboardX
+joblib
+torch==1.3.0
+torchvision==0.4.1

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/scripts/eval_wflw.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+CUDA_VISIBLE_DEVICES=1 python ../eval.py \
+                    --val_img_dir='../dataset/WFLW_test/images/' \
+                    --val_landmarks_dir='../dataset/WFLW_test/landmarks/' \
+                    --ckpt_save_path='../experiments/eval_iccv_0620' \
+                    --hg_blocks=4 \
+                    --pretrained_weights='../ckpt/WFLW_4HG.pth' \
+                    --num_landmarks=98 \
+                    --end_relu='False' \
+                    --batch_size=20 \

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__init__.py ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (170 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (185 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/utils.cpython-37.pyc ADDED Viewed

Binary file (11.8 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (11.6 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/AdaptiveWingLoss/utils/utils.py ADDED Viewed

	@@ -0,0 +1,354 @@

+from __future__ import print_function, division
+import os
+import sys
+import math
+import torch
+import cv2
+from PIL import Image
+from skimage import io
+from skimage import transform as ski_transform
+from scipy import ndimage
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+def _gaussian(
+        size=3, sigma=0.25, amplitude=1, normalize=False, width=None,
+        height=None, sigma_horz=None, sigma_vert=None, mean_horz=0.5,
+        mean_vert=0.5):
+    # handle some defaults
+    if width is None:
+        width = size
+    if height is None:
+        height = size
+    if sigma_horz is None:
+        sigma_horz = sigma
+    if sigma_vert is None:
+        sigma_vert = sigma
+    center_x = mean_horz * width + 0.5
+    center_y = mean_vert * height + 0.5
+    gauss = np.empty((height, width), dtype=np.float32)
+    # generate kernel
+    for i in range(height):
+        for j in range(width):
+            gauss[i][j] = amplitude * math.exp(-(math.pow((j + 1 - center_x) / (
+                sigma_horz * width), 2) / 2.0 + math.pow((i + 1 - center_y) / (sigma_vert * height), 2) / 2.0))
+    if normalize:
+        gauss = gauss / np.sum(gauss)
+    return gauss
+def draw_gaussian(image, point, sigma):
+    # Check if the gaussian is inside
+    ul = [np.floor(np.floor(point[0]) - 3 * sigma),
+          np.floor(np.floor(point[1]) - 3 * sigma)]
+    br = [np.floor(np.floor(point[0]) + 3 * sigma),
+          np.floor(np.floor(point[1]) + 3 * sigma)]
+    if (ul[0] > image.shape[1] or ul[1] >
+            image.shape[0] or br[0] < 1 or br[1] < 1):
+        return image
+    size = 6 * sigma + 1
+    g = _gaussian(size)
+    g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) -
+           int(max(1, ul[0])) + int(max(1, -ul[0]))]
+    g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) -
+           int(max(1, ul[1])) + int(max(1, -ul[1]))]
+    img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))]
+    img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))]
+    assert (g_x[0] > 0 and g_y[1] > 0)
+    correct = False
+    while not correct:
+        try:
+            image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]
+            ] = image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]] + g[g_y[0] - 1:g_y[1], g_x[0] - 1:g_x[1]]
+            correct = True
+        except:
+            print('img_x: {}, img_y: {}, g_x:{}, g_y:{}, point:{}, g_shape:{}, ul:{}, br:{}'.format(img_x, img_y, g_x, g_y, point, g.shape, ul, br))
+            ul = [np.floor(np.floor(point[0]) - 3 * sigma),
+                np.floor(np.floor(point[1]) - 3 * sigma)]
+            br = [np.floor(np.floor(point[0]) + 3 * sigma),
+                np.floor(np.floor(point[1]) + 3 * sigma)]
+            g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) -
+                int(max(1, ul[0])) + int(max(1, -ul[0]))]
+            g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) -
+                int(max(1, ul[1])) + int(max(1, -ul[1]))]
+            img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))]
+            img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))]
+            pass
+    image[image > 1] = 1
+    return image
+def transform(point, center, scale, resolution, rotation=0, invert=False):
+    _pt = np.ones(3)
+    _pt[0] = point[0]
+    _pt[1] = point[1]
+    h = 200.0 * scale
+    t = np.eye(3)
+    t[0, 0] = resolution / h
+    t[1, 1] = resolution / h
+    t[0, 2] = resolution * (-center[0] / h + 0.5)
+    t[1, 2] = resolution * (-center[1] / h + 0.5)
+    if rotation != 0:
+        rotation = -rotation
+        r = np.eye(3)
+        ang = rotation * math.pi / 180.0
+        s = math.sin(ang)
+        c = math.cos(ang)
+        r[0][0] = c
+        r[0][1] = -s
+        r[1][0] = s
+        r[1][1] = c
+        t_ = np.eye(3)
+        t_[0][2] = -resolution / 2.0
+        t_[1][2] = -resolution / 2.0
+        t_inv = torch.eye(3)
+        t_inv[0][2] = resolution / 2.0
+        t_inv[1][2] = resolution / 2.0
+        t = reduce(np.matmul, [t_inv, r, t_, t])
+    if invert:
+        t = np.linalg.inv(t)
+    new_point = (np.matmul(t, _pt))[0:2]
+    return new_point.astype(int)
+def cv_crop(image, landmarks, center, scale, resolution=256, center_shift=0):
+    new_image = cv2.copyMakeBorder(image, center_shift,
+                                   center_shift,
+                                   center_shift,
+                                   center_shift,
+                                   cv2.BORDER_CONSTANT, value=[0,0,0])
+    new_landmarks = landmarks.copy()
+    if center_shift != 0:
+        center[0] += center_shift
+        center[1] += center_shift
+        new_landmarks = new_landmarks + center_shift
+    length = 200 * scale
+    top = int(center[1] - length // 2)
+    bottom = int(center[1] + length // 2)
+    left = int(center[0] - length // 2)
+    right = int(center[0] + length // 2)
+    y_pad = abs(min(top, new_image.shape[0] - bottom, 0))
+    x_pad = abs(min(left, new_image.shape[1] - right, 0))
+    top, bottom, left, right = top + y_pad, bottom + y_pad, left + x_pad, right + x_pad
+    new_image = cv2.copyMakeBorder(new_image, y_pad,
+                                   y_pad,
+                                   x_pad,
+                                   x_pad,
+                                   cv2.BORDER_CONSTANT, value=[0,0,0])
+    new_image = new_image[top:bottom, left:right]
+    new_image = cv2.resize(new_image, dsize=(int(resolution), int(resolution)),
+                           interpolation=cv2.INTER_LINEAR)
+    new_landmarks[:, 0] = (new_landmarks[:, 0] + x_pad - left) * resolution / length
+    new_landmarks[:, 1] = (new_landmarks[:, 1] + y_pad - top) * resolution / length
+    return new_image, new_landmarks
+def cv_rotate(image, landmarks, heatmap, rot, scale, resolution=256):
+    img_mat = cv2.getRotationMatrix2D((resolution//2, resolution//2), rot, scale)
+    ones = np.ones(shape=(landmarks.shape[0], 1))
+    stacked_landmarks = np.hstack([landmarks, ones])
+    new_landmarks = img_mat.dot(stacked_landmarks.T).T
+    if np.max(new_landmarks) > 255 or np.min(new_landmarks) < 0:
+        return image, landmarks, heatmap
+    else:
+        new_image = cv2.warpAffine(image, img_mat, (resolution, resolution))
+        if heatmap is not None:
+            new_heatmap = np.zeros((heatmap.shape[0], 64, 64))
+            for i in range(heatmap.shape[0]):
+                if new_landmarks[i][0] > 0:
+                    new_heatmap[i] = draw_gaussian(new_heatmap[i],
+                                                   new_landmarks[i]/4.0+1, 1)
+        return new_image, new_landmarks, new_heatmap
+def show_landmarks(image, heatmap, gt_landmarks, gt_heatmap):
+    """Show image with pred_landmarks"""
+    pred_landmarks = []
+    pred_landmarks, _ = get_preds_fromhm(torch.from_numpy(heatmap).unsqueeze(0))
+    pred_landmarks = pred_landmarks.squeeze()*4
+    # pred_landmarks2 = get_preds_fromhm2(heatmap)
+    heatmap = np.max(gt_heatmap, axis=0)
+    heatmap = heatmap / np.max(heatmap)
+    # image = ski_transform.resize(image, (64, 64))*255
+    image = image.astype(np.uint8)
+    heatmap = np.max(gt_heatmap, axis=0)
+    heatmap = ski_transform.resize(heatmap, (image.shape[0], image.shape[1]))
+    heatmap *= 255
+    heatmap = heatmap.astype(np.uint8)
+    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
+    plt.imshow(image)
+    plt.scatter(gt_landmarks[:, 0], gt_landmarks[:, 1], s=0.5, marker='.', c='g')
+    plt.scatter(pred_landmarks[:, 0], pred_landmarks[:, 1], s=0.5, marker='.', c='r')
+    plt.pause(0.001)  # pause a bit so that plots are updated
+def fan_NME(pred_heatmaps, gt_landmarks, num_landmarks=68):
+    '''
+       Calculate total NME for a batch of data
+       Args:
+           pred_heatmaps: torch tensor of size [batch, points, height, width]
+           gt_landmarks: torch tesnsor of size [batch, points, x, y]
+       Returns:
+           nme: sum of nme for this batch
+    '''
+    nme = 0
+    pred_landmarks, _ = get_preds_fromhm(pred_heatmaps)
+    pred_landmarks = pred_landmarks.numpy()
+    gt_landmarks = gt_landmarks.numpy()
+    for i in range(pred_landmarks.shape[0]):
+        pred_landmark = pred_landmarks[i] * 4.0
+        gt_landmark = gt_landmarks[i]
+        if num_landmarks == 68:
+            left_eye = np.average(gt_landmark[36:42], axis=0)
+            right_eye = np.average(gt_landmark[42:48], axis=0)
+            norm_factor = np.linalg.norm(left_eye - right_eye)
+            # norm_factor = np.linalg.norm(gt_landmark[36]- gt_landmark[45])
+        elif num_landmarks == 98:
+            norm_factor = np.linalg.norm(gt_landmark[60]- gt_landmark[72])
+        elif num_landmarks == 19:
+            left, top = gt_landmark[-2, :]
+            right, bottom = gt_landmark[-1, :]
+            norm_factor = math.sqrt(abs(right - left)*abs(top-bottom))
+            gt_landmark = gt_landmark[:-2, :]
+        elif num_landmarks == 29:
+            # norm_factor = np.linalg.norm(gt_landmark[8]- gt_landmark[9])
+            norm_factor = np.linalg.norm(gt_landmark[16]- gt_landmark[17])
+        nme += (np.sum(np.linalg.norm(pred_landmark - gt_landmark, axis=1)) / pred_landmark.shape[0]) / norm_factor
+    return nme
+def fan_NME_hm(pred_heatmaps, gt_heatmaps, num_landmarks=68):
+    '''
+       Calculate total NME for a batch of data
+       Args:
+           pred_heatmaps: torch tensor of size [batch, points, height, width]
+           gt_landmarks: torch tesnsor of size [batch, points, x, y]
+       Returns:
+           nme: sum of nme for this batch
+    '''
+    nme = 0
+    pred_landmarks, _ = get_index_fromhm(pred_heatmaps)
+    pred_landmarks = pred_landmarks.numpy()
+    gt_landmarks = gt_landmarks.numpy()
+    for i in range(pred_landmarks.shape[0]):
+        pred_landmark = pred_landmarks[i] * 4.0
+        gt_landmark = gt_landmarks[i]
+        if num_landmarks == 68:
+            left_eye = np.average(gt_landmark[36:42], axis=0)
+            right_eye = np.average(gt_landmark[42:48], axis=0)
+            norm_factor = np.linalg.norm(left_eye - right_eye)
+        else:
+            norm_factor = np.linalg.norm(gt_landmark[60]- gt_landmark[72])
+        nme += (np.sum(np.linalg.norm(pred_landmark - gt_landmark, axis=1)) / pred_landmark.shape[0]) / norm_factor
+    return nme
+def power_transform(img, power):
+    img = np.array(img)
+    img_new = np.power((img/255.0), power) * 255.0
+    img_new = img_new.astype(np.uint8)
+    img_new = Image.fromarray(img_new)
+    return img_new
+def get_preds_fromhm(hm, center=None, scale=None, rot=None):
+    max, idx = torch.max(
+        hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
+    idx += 1
+    preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
+    preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
+    preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
+    for i in range(preds.size(0)):
+        for j in range(preds.size(1)):
+            hm_ = hm[i, j, :]
+            pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
+            if pX > 0 and pX < 63 and pY > 0 and pY < 63:
+                diff = torch.FloatTensor(
+                    [hm_[pY, pX + 1] - hm_[pY, pX - 1],
+                     hm_[pY + 1, pX] - hm_[pY - 1, pX]])
+                preds[i, j].add_(diff.sign_().mul_(.25))
+    preds.add_(-0.5)
+    preds_orig = torch.zeros(preds.size())
+    if center is not None and scale is not None:
+        for i in range(hm.size(0)):
+            for j in range(hm.size(1)):
+                preds_orig[i, j] = transform(
+                    preds[i, j], center, scale, hm.size(2), rot, True)
+    return preds, preds_orig
+def get_index_fromhm(hm):
+    max, idx = torch.max(
+        hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
+    preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
+    preds[..., 0].remainder_(hm.size(3))
+    preds[..., 1].div_(hm.size(2)).floor_()
+    for i in range(preds.size(0)):
+        for j in range(preds.size(1)):
+            hm_ = hm[i, j, :]
+            pX, pY = int(preds[i, j, 0]), int(preds[i, j, 1])
+            if pX > 0 and pX < 63 and pY > 0 and pY < 63:
+                diff = torch.FloatTensor(
+                    [hm_[pY, pX + 1] - hm_[pY, pX - 1],
+                     hm_[pY + 1, pX] - hm_[pY - 1, pX]])
+                preds[i, j].add_(diff.sign_().mul_(.25))
+    return preds
+def shuffle_lr(parts, num_landmarks=68, pairs=None):
+    if num_landmarks == 68:
+        if pairs is None:
+            pairs = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11], [6, 10],
+                    [7, 9], [17, 26], [18, 25], [19, 24], [20, 23], [21, 22], [36, 45],
+                    [37, 44], [38, 43], [39, 42], [41, 46], [40, 47], [31, 35], [32, 34],
+                    [50, 52], [49, 53], [48, 54], [61, 63], [60, 64], [67, 65], [59, 55], [58, 56]]
+    elif num_landmarks == 98:
+        if pairs is None:
+            pairs = [[0, 32], [1,31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26], [7, 25], [8, 24], [9, 23], [10, 22], [11, 21], [12, 20], [13, 19], [14, 18], [15, 17], [33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50], [39, 49], [40, 48], [41, 47], [60, 72], [61, 71], [62, 70], [63, 69], [64, 68], [65, 75], [66, 74], [67, 73], [96, 97], [55, 59], [56, 58], [76, 82], [77, 81], [78, 80], [88, 92], [89, 91], [95, 93], [87, 83], [86, 84]]
+    elif num_landmarks == 19:
+        if pairs is None:
+            pairs = [[0, 5], [1, 4], [2, 3], [6, 11], [7, 10], [8, 9], [12, 14], [15, 17]]
+    elif num_landmarks == 29:
+        if pairs is None:
+            pairs = [[0, 1], [4, 6], [5, 7], [2, 3], [8, 9], [12, 14], [16, 17], [13, 15], [10, 11], [18, 19], [22, 23]]
+    for matched_p in pairs:
+        idx1, idx2 = matched_p[0], matched_p[1]
+        tmp = np.copy(parts[idx1])
+        np.copyto(parts[idx1], parts[idx2])
+        np.copyto(parts[idx2], tmp)
+    return parts
+def generate_weight_map(weight_map,heatmap):
+    k_size = 3
+    dilate = ndimage.grey_dilation(heatmap ,size=(k_size,k_size))
+    weight_map[np.where(dilate>0.2)] = 1
+    return weight_map
+def fig2data(fig):
+    """
+    @brief Convert a Matplotlib figure to a 4D numpy array with RGBA channels and return it
+    @param fig a matplotlib figure
+    @return a numpy 3D array of RGBA values
+    """
+    # draw the renderer
+    fig.canvas.draw ( )
+    # Get the RGB buffer from the figure
+    w,h = fig.canvas.get_width_height()
+    buf = np.fromstring (fig.canvas.tostring_rgb(), dtype=np.uint8)
+    buf.shape = (w, h, 3)
+    # canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have it in RGBA mode
+    buf = np.roll (buf, 3, axis=2)
+    return buf

marlenezw/audio-driven-animations/MakeItTalk/__init__.py ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (147 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (162 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/CODEOWNERS ADDED Viewed

	@@ -0,0 +1 @@


1	+ * @papulke

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/LICENCE.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2019 Jordan Yaniv
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/README.md ADDED Viewed

	@@ -0,0 +1,98 @@

+# The Face of Art: Landmark Detection and Geometric Style in Portraits
+Code for the landmark detection framework described in [The Face of Art: Landmark Detection and Geometric Style in Portraits](http://www.faculty.idc.ac.il/arik/site/foa/face-of-art.asp) (SIGGRAPH 2019)
+![](old/teaser.png)
+<sub><sup>Top: landmark detection results on artistic portraits with different styles allows to define the geometric style of an artist. Bottom: results of the style transfer of portraits using various artists' geometric style, including Amedeo Modigliani, Pablo Picasso, Margaret Keane, Fernand Léger, and Tsuguharu Foujita. Top right portrait is from 'Woman with Peanuts,' ©1962, Estate of Roy Lichtenstein.</sup></sub>
+## Getting Started
+### Requirements
+* python
+* anaconda
+### Download
+#### Model
+download model weights from [here](https://www.dropbox.com/sh/hrxcyug1bmbj6cs/AAAxq_zI5eawcLjM8zvUwaXha?dl=0).
+#### Datasets
+* The datasets used for training and evaluating our model can be found [here](https://ibug.doc.ic.ac.uk/resources/facial-point-annotations/).
+* The Artistic-Faces dataset can be found [here](http://www.faculty.idc.ac.il/arik/site/foa/artistic-faces-dataset.asp).
+* Training images with texture augmentation can be found [here](https://www.dropbox.com/sh/av2k1i1082z0nie/AAC5qV1E2UkqpDLVsv7TazMta?dl=0).
+  before applying texture style transfer, the training images were cropped to the ground-truth face bounding-box with 25% margin. To crop training images, run the script `crop_training_set.py`.
+* our model expects the following directory structure of landmark detection datasets:
+```
+landmark_detection_datasets
+    ├── training
+    ├── test
+    ├── challenging
+    ├── common
+    ├── full
+    ├── crop_gt_margin_0.25 (cropped images of training set)
+    └── crop_gt_margin_0.25_ns (cropped images of training set + texture style transfer)
+```
+### Install
+Create a virtual environment and install the following:
+* opencv
+* menpo
+* menpofit
+* tensorflow-gpu
+for python 2:
+```
+conda create -n foa_env python=2.7 anaconda
+source activate foa_env
+conda install -c menpo opencv
+conda install -c menpo menpo
+conda install -c menpo menpofit
+pip install tensorflow-gpu
+```
+for python 3:
+```
+conda create -n foa_env python=3.5 anaconda
+source activate foa_env
+conda install -c menpo opencv
+conda install -c menpo menpo
+conda install -c menpo menpofit
+pip3 install tensorflow-gpu
+```
+Clone repository:
+```
+git clone https://github.com/papulke/deep_face_heatmaps
+```
+## Instructions
+### Training
+To train the network you need to run `train_heatmaps_network.py`
+example for training a model with texture augmentation (100% of images) and geometric augmentation (~70% of images):
+```
+python train_heatmaps_network.py --output_dir='test_artistic_aug' --augment_geom=True \
+--augment_texture=True --p_texture=1. --p_geom=0.7
+```
+### Testing
+For using the detection framework to predict landmarks, run the script `predict_landmarks.py`
+## Acknowledgments
+* [ect](https://github.com/HongwenZhang/ECT-FaceAlignment)
+* [menpo](https://github.com/menpo/menpo)
+* [menpofit](https://github.com/menpo/menpofit)
+* [mdm](https://github.com/trigeorgis/mdm)
+* [style transfer implementation](https://github.com/woodrush/neural-art-tf)
+* [painter-by-numbers dataset](https://www.kaggle.com/c/painter-by-numbers/data)

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__init__.py ADDED Viewed

File without changes

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__init__.pyc ADDED Viewed

Binary file (161 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (157 Bytes). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/data_loading_functions.cpython-36.pyc ADDED Viewed

Binary file (4.56 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/deep_heatmaps_model_fusion_net.cpython-36.pyc ADDED Viewed

Binary file (21.6 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/deformation_functions.cpython-36.pyc ADDED Viewed

Binary file (9 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/logging_functions.cpython-36.pyc ADDED Viewed

Binary file (5.81 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/menpo_functions.cpython-36.pyc ADDED Viewed

Binary file (9.22 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/ops.cpython-36.pyc ADDED Viewed

Binary file (3.6 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/__pycache__/pdm_clm_functions.cpython-36.pyc ADDED Viewed

Binary file (6.34 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/crop_training_set.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from scipy.misc import imsave
+from menpo_functions import *
+from data_loading_functions import *
+# define paths & parameters for cropping dataset
+img_dir = '~/landmark_detection_datasets/'
+dataset = 'training'
+bb_type = 'gt'
+margin = 0.25
+image_size = 256
+# load bounding boxes
+bb_dir = os.path.join(img_dir, 'Bounding_Boxes')
+bb_dictionary = load_bb_dictionary(bb_dir, mode='TRAIN', test_data=dataset)
+# directory for saving face crops
+outdir = os.path.join(img_dir, 'crop_'+bb_type+'_margin_'+str(margin))
+if not os.path.exists(outdir):
+    os.mkdir(outdir)
+# load images
+imgs_to_crop = load_menpo_image_list(
+    img_dir=img_dir, train_crop_dir=None, img_dir_ns=None, mode='TRAIN', bb_dictionary=bb_dictionary,
+    image_size=image_size, margin=margin, bb_type=bb_type, augment_basic=False)
+# save cropped images with matching landmarks
+print ("\ncropping dataset from: "+os.path.join(img_dir, dataset))
+print ("\nsaving cropped dataset to: "+outdir)
+for im in imgs_to_crop:
+    if im.pixels.shape[0] == 1:
+        im_pixels = gray2rgb(np.squeeze(im.pixels))
+    else:
+        im_pixels = np.rollaxis(im.pixels, 0, 3)
+    imsave(os.path.join(outdir, im.path.name.split('.')[0]+'.png'), im_pixels)
+    mio.export_landmark_file(im.landmarks['PTS'], os.path.join(outdir, im.path.name.split('.')[0]+'.pts'))
+print ("\ncropping dataset completed!")

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/data_loading_functions.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import numpy as np
+import os
+from skimage.color import gray2rgb
+def train_val_shuffle_inds_per_epoch(valid_inds, train_inds, train_iter, batch_size, log_path, save_log=True):
+    """shuffle image indices for each training epoch and save to log"""
+    np.random.seed(0)
+    num_train_images = len(train_inds)
+    num_epochs = int(np.ceil((1. * train_iter) / (1. * num_train_images / batch_size)))+1
+    epoch_inds_shuffle = np.zeros((num_epochs, num_train_images)).astype(int)
+    img_inds = np.arange(num_train_images)
+    for i in range(num_epochs):
+        np.random.shuffle(img_inds)
+        epoch_inds_shuffle[i, :] = img_inds
+    if save_log:
+        with open(os.path.join(log_path, "train_val_shuffle_inds.csv"), "wb") as f:
+            if valid_inds is not None:
+                f.write(b'valid inds\n')
+                np.savetxt(f, valid_inds.reshape(1, -1), fmt='%i', delimiter=",")
+            f.write(b'train inds\n')
+            np.savetxt(f, train_inds.reshape(1, -1), fmt='%i', delimiter=",")
+            f.write(b'shuffle inds\n')
+            np.savetxt(f, epoch_inds_shuffle, fmt='%i', delimiter=",")
+    return epoch_inds_shuffle
+def gaussian(x, y, x0, y0, sigma=6):
+    return 1./(np.sqrt(2*np.pi)*sigma) * np.exp(-0.5 * ((x-x0)**2 + (y-y0)**2) / sigma**2)
+def create_gaussian_filter(sigma=6, win_mult=3.5):
+    win_size = int(win_mult * sigma)
+    x, y = np.mgrid[0:2*win_size+1, 0:2*win_size+1]
+    gauss_filt = (8./3)*sigma*gaussian(x, y, win_size, win_size, sigma=sigma)  # same as in ECT
+    return gauss_filt
+def load_images(img_list, batch_inds, image_size=256, c_dim=3, scale=255):
+        """ load images as a numpy array from menpo image list """
+        num_inputs = len(batch_inds)
+        batch_menpo_images = img_list[batch_inds]
+        images = np.zeros([num_inputs, image_size, image_size, c_dim]).astype('float32')
+        for ind, img in enumerate(batch_menpo_images):
+            if img.n_channels < 3 and c_dim == 3:
+                images[ind, :, :, :] = gray2rgb(img.pixels_with_channels_at_back())
+            else:
+                images[ind, :, :, :] = img.pixels_with_channels_at_back()
+        if scale is 255:
+            images *= 255
+        elif scale is 0:
+            images = 2 * images - 1
+        return images
+# loading functions with pre-allocation and approx heat-map generation
+def create_approx_heat_maps_alloc_once(landmarks, maps, gauss_filt=None, win_mult=3.5, num_landmarks=68, image_size=256,
+                                       sigma=6):
+    """ create heatmaps from input landmarks"""
+    maps.fill(0.)
+    win_size = int(win_mult * sigma)
+    filt_size = 2 * win_size + 1
+    landmarks = landmarks.astype(int)
+    if gauss_filt is None:
+        x_small, y_small = np.mgrid[0:2 * win_size + 1, 0:2 * win_size + 1]
+        gauss_filt = (8. / 3) * sigma * gaussian(x_small, y_small, win_size, win_size, sigma=sigma)  # same as in ECT
+    for i in range(num_landmarks):
+        min_row = landmarks[i, 0] - win_size
+        max_row = landmarks[i, 0] + win_size + 1
+        min_col = landmarks[i, 1] - win_size
+        max_col = landmarks[i, 1] + win_size + 1
+        if min_row < 0:
+            min_row_gap = -1 * min_row
+            min_row = 0
+        else:
+            min_row_gap = 0
+        if min_col < 0:
+            min_col_gap = -1 * min_col
+            min_col = 0
+        else:
+            min_col_gap = 0
+        if max_row > image_size:
+            max_row_gap = max_row - image_size
+            max_row = image_size
+        else:
+            max_row_gap = 0
+        if max_col > image_size:
+            max_col_gap = max_col - image_size
+            max_col = image_size
+        else:
+            max_col_gap = 0
+        maps[min_row:max_row, min_col:max_col, i] =\
+            gauss_filt[min_row_gap:filt_size - 1 * max_row_gap, min_col_gap:filt_size - 1 * max_col_gap]
+def load_images_landmarks_approx_maps_alloc_once(
+        img_list, batch_inds, images, maps_small, maps, landmarks, image_size=256, num_landmarks=68,
+        scale=255, gauss_filt_large=None, gauss_filt_small=None, win_mult=3.5, sigma=6, save_landmarks=False):
+    """ load images and gt landmarks from menpo image list, and create matching heatmaps """
+    batch_menpo_images = img_list[batch_inds]
+    c_dim = images.shape[-1]
+    grp_name = batch_menpo_images[0].landmarks.group_labels[0]
+    win_size_large = int(win_mult * sigma)
+    win_size_small = int(win_mult * (1.*sigma/4))
+    if gauss_filt_small is None:
+        x_small, y_small = np.mgrid[0:2 * win_size_small + 1, 0:2 * win_size_small + 1]
+        gauss_filt_small = (8. / 3) * (1.*sigma/4) * gaussian(
+            x_small, y_small, win_size_small, win_size_small, sigma=1.*sigma/4)  # same as in ECT
+    if gauss_filt_large is None:
+        x_large, y_large = np.mgrid[0:2 * win_size_large + 1, 0:2 * win_size_large + 1]
+        gauss_filt_large = (8. / 3) * sigma * gaussian(x_large, y_large, win_size_large, win_size_large, sigma=sigma)  # same as in ECT
+    for ind, img in enumerate(batch_menpo_images):
+        if img.n_channels < 3 and c_dim == 3:
+            images[ind, :, :, :] = gray2rgb(img.pixels_with_channels_at_back())
+        else:
+            images[ind, :, :, :] = img.pixels_with_channels_at_back()
+        lms = img.landmarks[grp_name].points
+        lms = np.minimum(lms, image_size - 1)
+        create_approx_heat_maps_alloc_once(
+            landmarks=lms, maps=maps[ind, :, :, :], gauss_filt=gauss_filt_large, win_mult=win_mult,
+            num_landmarks=num_landmarks, image_size=image_size, sigma=sigma)
+        lms_small = img.resize([image_size / 4, image_size / 4]).landmarks[grp_name].points
+        lms_small = np.minimum(lms_small, image_size / 4 - 1)
+        create_approx_heat_maps_alloc_once(
+            landmarks=lms_small, maps=maps_small[ind, :, :, :], gauss_filt=gauss_filt_small, win_mult=win_mult,
+            num_landmarks=num_landmarks, image_size=image_size / 4, sigma=1. * sigma / 4)
+        if save_landmarks:
+            landmarks[ind, :, :] = lms
+    if scale is 255:
+        images *= 255
+    elif scale is 0:
+        images = 2 * images - 1

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/data_loading_functions.pyc ADDED Viewed

Binary file (5.95 kB). View file

marlenezw/audio-driven-animations/MakeItTalk/face_of_art/deep_heatmaps_model_fusion_net.py ADDED Viewed

	@@ -0,0 +1,872 @@

+import scipy.io
+import scipy.misc
+from glob import glob
+import os
+import numpy as np
+from thirdparty.face_of_art.ops import *
+import tensorflow as tf
+from tensorflow import contrib
+from thirdparty.face_of_art.menpo_functions import *
+from thirdparty.face_of_art.logging_functions import *
+from thirdparty.face_of_art.data_loading_functions import *
+class DeepHeatmapsModel(object):
+    """facial landmark localization Network"""
+    def __init__(self, mode='TRAIN', train_iter=100000, batch_size=10, learning_rate=1e-3, l_weight_primary=1.,
+                 l_weight_fusion=1.,l_weight_upsample=3.,adam_optimizer=True,momentum=0.95,step=100000, gamma=0.1,reg=0,
+                 weight_initializer='xavier', weight_initializer_std=0.01, bias_initializer=0.0, image_size=256,c_dim=3,
+                 num_landmarks=68, sigma=1.5, scale=1, margin=0.25, bb_type='gt', win_mult=3.33335,
+                 augment_basic=True,augment_texture=False, p_texture=0., augment_geom=False, p_geom=0.,
+                 output_dir='output', save_model_path='model',
+                 save_sample_path='sample', save_log_path='logs', test_model_path='model/deep_heatmaps-50000',
+                 pre_train_path='model/deep_heatmaps-50000', load_pretrain=False, load_primary_only=False,
+                 img_path='data', test_data='full', valid_data='full', valid_size=0, log_valid_every=5,
+                 train_crop_dir='crop_gt_margin_0.25', img_dir_ns='crop_gt_margin_0.25_ns',
+                 print_every=100, save_every=5000, sample_every=5000, sample_grid=9, sample_to_log=True,
+                 debug_data_size=20, debug=False, epoch_data_dir='epoch_data', use_epoch_data=False, menpo_verbose=True):
+        # define some extra parameters
+        self.log_histograms = False  # save weight + gradient histogram to log
+        self.save_valid_images = True  # sample heat maps of validation images
+        self.sample_per_channel = False  # sample heatmaps separately for each landmark
+        # for fine-tuning, choose reset_training_op==True. when resuming training, reset_training_op==False
+        self.reset_training_op = False
+        self.fast_img_gen = True
+        self.compute_nme = True  # compute normalized mean error
+        self.config = tf.ConfigProto()
+        self.config.gpu_options.allow_growth = True
+        # sampling and logging parameters
+        self.print_every = print_every  # print losses to screen + log
+        self.save_every = save_every  # save model
+        self.sample_every = sample_every  # save images of gen heat maps compared to GT
+        self.sample_grid = sample_grid  # number of training images in sample
+        self.sample_to_log = sample_to_log  # sample images to log instead of disk
+        self.log_valid_every = log_valid_every  # log validation loss (in epochs)
+        self.debug = debug
+        self.debug_data_size = debug_data_size
+        self.use_epoch_data = use_epoch_data
+        self.epoch_data_dir = epoch_data_dir
+        self.load_pretrain = load_pretrain
+        self.load_primary_only = load_primary_only
+        self.pre_train_path = pre_train_path
+        self.mode = mode
+        self.train_iter = train_iter
+        self.learning_rate = learning_rate
+        self.image_size = image_size
+        self.c_dim = c_dim
+        self.batch_size = batch_size
+        self.num_landmarks = num_landmarks
+        self.save_log_path = save_log_path
+        self.save_sample_path = save_sample_path
+        self.save_model_path = save_model_path
+        self.test_model_path = test_model_path
+        self.img_path=img_path
+        self.momentum = momentum
+        self.step = step  # for lr decay
+        self.gamma = gamma  # for lr decay
+        self.reg = reg  # weight decay scale
+        self.l_weight_primary = l_weight_primary  # primary loss weight
+        self.l_weight_fusion = l_weight_fusion  # fusion loss weight
+        self.l_weight_upsample = l_weight_upsample  # upsample loss weight
+        self.weight_initializer = weight_initializer  # random_normal or xavier
+        self.weight_initializer_std = weight_initializer_std
+        self.bias_initializer = bias_initializer
+        self.adam_optimizer = adam_optimizer
+        self.sigma = sigma  # sigma for heatmap generation
+        self.scale = scale  # scale for image normalization 255 / 1 / 0
+        self.win_mult = win_mult  # gaussian filter size for cpu/gpu approximation: 2 * sigma * win_mult + 1
+        self.test_data = test_data  # if mode is TEST, this choose the set to use full/common/challenging/test/art
+        self.train_crop_dir = train_crop_dir
+        self.img_dir_ns = os.path.join(img_path,img_dir_ns)
+        self.augment_basic = augment_basic  # perform basic augmentation (rotation,flip,crop)
+        self.augment_texture = augment_texture  # perform artistic texture augmentation (NS)
+        self.p_texture = p_texture  # initial probability of artistic texture augmentation
+        self.augment_geom = augment_geom  # perform artistic geometric augmentation
+        self.p_geom = p_geom  # initial probability of artistic geometric augmentation
+        self.valid_size = valid_size
+        self.valid_data = valid_data
+        # load image, bb and landmark data using menpo
+        self.bb_dir = os.path.join(img_path, 'Bounding_Boxes')
+        self.bb_dictionary = load_bb_dictionary(self.bb_dir, mode, test_data=self.test_data)
+        # use pre-augmented data, to save time during training
+        if self.use_epoch_data:
+            epoch_0 = os.path.join(self.epoch_data_dir, '0')
+            self.img_menpo_list = load_menpo_image_list(
+                img_path, train_crop_dir=epoch_0, img_dir_ns=None, mode=mode, bb_dictionary=self.bb_dictionary,
+                image_size=self.image_size, test_data=self.test_data, augment_basic=False, augment_texture=False,
+                augment_geom=False, verbose=menpo_verbose)
+        else:
+            self.img_menpo_list = load_menpo_image_list(
+                img_path, train_crop_dir, self.img_dir_ns, mode, bb_dictionary=self.bb_dictionary,
+                image_size=self.image_size, margin=margin, bb_type=bb_type, test_data=self.test_data,
+                augment_basic=augment_basic, augment_texture=augment_texture, p_texture=p_texture,
+                augment_geom=augment_geom, p_geom=p_geom, verbose=menpo_verbose)
+        if mode == 'TRAIN':
+                train_params = locals()
+                print_training_params_to_file(train_params)  # save init parameters
+                self.train_inds = np.arange(len(self.img_menpo_list))
+                if self.debug:
+                    self.train_inds = self.train_inds[:self.debug_data_size]
+                    self.img_menpo_list = self.img_menpo_list[self.train_inds]
+                if valid_size > 0:
+                    self.valid_bb_dictionary = load_bb_dictionary(self.bb_dir, 'TEST', test_data=self.valid_data)
+                    self.valid_img_menpo_list = load_menpo_image_list(
+                        img_path, train_crop_dir, self.img_dir_ns, 'TEST', bb_dictionary=self.valid_bb_dictionary,
+                        image_size=self.image_size, margin=margin, bb_type=bb_type, test_data=self.valid_data,
+                        verbose=menpo_verbose)
+                    np.random.seed(0)
+                    self.val_inds = np.arange(len(self.valid_img_menpo_list))
+                    np.random.shuffle(self.val_inds)
+                    self.val_inds = self.val_inds[:self.valid_size]
+                    self.valid_img_menpo_list = self.valid_img_menpo_list[self.val_inds]
+                    self.valid_images_loaded =\
+                        np.zeros([self.valid_size, self.image_size, self.image_size, self.c_dim]).astype('float32')
+                    self.valid_gt_maps_small_loaded =\
+                        np.zeros([self.valid_size, self.image_size / 4, self.image_size / 4,
+                                  self.num_landmarks]).astype('float32')
+                    self.valid_gt_maps_loaded =\
+                        np.zeros([self.valid_size, self.image_size, self.image_size, self.num_landmarks]
+                                 ).astype('float32')
+                    self.valid_landmarks_loaded = np.zeros([self.valid_size, num_landmarks, 2]).astype('float32')
+                    self.valid_landmarks_pred = np.zeros([self.valid_size, self.num_landmarks, 2]).astype('float32')
+                    load_images_landmarks_approx_maps_alloc_once(
+                        self.valid_img_menpo_list, np.arange(self.valid_size), images=self.valid_images_loaded,
+                        maps_small=self.valid_gt_maps_small_loaded, maps=self.valid_gt_maps_loaded,
+                        landmarks=self.valid_landmarks_loaded, image_size=self.image_size,
+                        num_landmarks=self.num_landmarks, scale=self.scale, win_mult=self.win_mult, sigma=self.sigma,
+                        save_landmarks=self.compute_nme)
+                    if self.valid_size > self.sample_grid:
+                        self.valid_gt_maps_loaded = self.valid_gt_maps_loaded[:self.sample_grid]
+                        self.valid_gt_maps_small_loaded = self.valid_gt_maps_small_loaded[:self.sample_grid]
+                else:
+                    self.val_inds = None
+                self.epoch_inds_shuffle = train_val_shuffle_inds_per_epoch(
+                    self.val_inds, self.train_inds, train_iter, batch_size, save_log_path)
+    def add_placeholders(self):
+        if self.mode == 'TEST':
+                self.images = tf.placeholder(
+                    tf.float32, [None, self.image_size, self.image_size, self.c_dim], 'images')
+                self.heatmaps = tf.placeholder(
+                    tf.float32, [None, self.image_size, self.image_size, self.num_landmarks], 'heatmaps')
+                self.heatmaps_small = tf.placeholder(
+                    tf.float32, [None, int(self.image_size/4), int(self.image_size/4), self.num_landmarks], 'heatmaps_small')
+                self.lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'lms')
+                self.pred_lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'pred_lms')
+        elif self.mode == 'TRAIN':
+            self.images = tf.placeholder(
+                tf.float32, [None, self.image_size, self.image_size, self.c_dim], 'train_images')
+            self.heatmaps = tf.placeholder(
+                tf.float32, [None, self.image_size, self.image_size, self.num_landmarks], 'train_heatmaps')
+            self.heatmaps_small = tf.placeholder(
+                tf.float32, [None, int(self.image_size/4), int(self.image_size/4), self.num_landmarks], 'train_heatmaps_small')
+            self.train_lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'train_lms')
+            self.train_pred_lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'train_pred_lms')
+            self.valid_lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'valid_lms')
+            self.valid_pred_lms = tf.placeholder(tf.float32, [None, self.num_landmarks, 2], 'valid_pred_lms')
+            # self.p_texture_log = tf.placeholder(tf.float32, [])
+            # self.p_geom_log = tf.placeholder(tf.float32, [])
+            # self.sparse_hm_small = tf.placeholder(tf.float32, [None, int(self.image_size/4), int(self.image_size/4), 1])
+            # self.sparse_hm = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 1])
+            if self.sample_to_log:
+                row = int(np.sqrt(self.sample_grid))
+                self.log_image_map_small = tf.placeholder(
+                    tf.uint8, [None, row * int(self.image_size/4), 3 * row * int(self.image_size/4), self.c_dim],
+                    'sample_img_map_small')
+                self.log_image_map = tf.placeholder(
+                    tf.uint8, [None, row * self.image_size, 3 * row * self.image_size, self.c_dim],
+                    'sample_img_map')
+                if self.sample_per_channel:
+                    row = np.ceil(np.sqrt(self.num_landmarks)).astype(np.int64)
+                    self.log_map_channels_small = tf.placeholder(
+                        tf.uint8, [None, row * int(self.image_size/4), 2 * row * int(self.image_size/4), self.c_dim],
+                        'sample_map_channels_small')
+                    self.log_map_channels = tf.placeholder(
+                        tf.uint8, [None, row * self.image_size, 2 * row * self.image_size, self.c_dim],
+                        'sample_map_channels')
+    def heatmaps_network(self, input_images, reuse=None, name='pred_heatmaps'):
+        with tf.name_scope(name):
+            if self.weight_initializer == 'xavier':
+                weight_initializer = contrib.layers.xavier_initializer()
+            else:
+                weight_initializer = tf.random_normal_initializer(stddev=self.weight_initializer_std)
+            bias_init = tf.constant_initializer(self.bias_initializer)
+            with tf.variable_scope('heatmaps_network'):
+                with tf.name_scope('primary_net'):
+                    l1 = conv_relu_pool(input_images, 5, 128, conv_ker_init=weight_initializer, conv_bias_init=bias_init,
+                                        reuse=reuse, var_scope='conv_1')
+                    l2 = conv_relu_pool(l1, 5, 128, conv_ker_init=weight_initializer, conv_bias_init=bias_init,
+                                        reuse=reuse, var_scope='conv_2')
+                    l3 = conv_relu(l2, 5, 128, conv_ker_init=weight_initializer, conv_bias_init=bias_init,
+                                   reuse=reuse, var_scope='conv_3')
+                    l4_1 = conv_relu(l3, 3, 128, conv_dilation=1, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_4_1')
+                    l4_2 = conv_relu(l3, 3, 128, conv_dilation=2, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_4_2')
+                    l4_3 = conv_relu(l3, 3, 128, conv_dilation=3, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_4_3')
+                    l4_4 = conv_relu(l3, 3, 128, conv_dilation=4, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_4_4')
+                    l4 = tf.concat([l4_1, l4_2, l4_3, l4_4], 3, name='conv_4')
+                    l5_1 = conv_relu(l4, 3, 256, conv_dilation=1, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_5_1')
+                    l5_2 = conv_relu(l4, 3, 256, conv_dilation=2, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_5_2')
+                    l5_3 = conv_relu(l4, 3, 256, conv_dilation=3, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_5_3')
+                    l5_4 = conv_relu(l4, 3, 256, conv_dilation=4, conv_ker_init=weight_initializer,
+                                     conv_bias_init=bias_init, reuse=reuse, var_scope='conv_5_4')
+                    l5 = tf.concat([l5_1, l5_2, l5_3, l5_4], 3, name='conv_5')
+                    l6 = conv_relu(l5, 1, 512, conv_ker_init=weight_initializer,
+                                   conv_bias_init=bias_init, reuse=reuse, var_scope='conv_6')
+                    l7 = conv_relu(l6, 1, 256, conv_ker_init=weight_initializer,
+                                   conv_bias_init=bias_init, reuse=reuse, var_scope='conv_7')
+                    primary_out = conv(l7, 1, self.num_landmarks, conv_ker_init=weight_initializer,
+                                            conv_bias_init=bias_init, reuse=reuse, var_scope='conv_8')
+                with tf.name_scope('fusion_net'):
+                    l_fsn_0 = tf.concat([l3, l7], 3, name='conv_3_7_fsn')
+                    l_fsn_1_1 = conv_relu(l_fsn_0, 3, 64, conv_dilation=1, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_1_1')
+                    l_fsn_1_2 = conv_relu(l_fsn_0, 3, 64, conv_dilation=2, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_1_2')
+                    l_fsn_1_3 = conv_relu(l_fsn_0, 3, 64, conv_dilation=3, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_1_3')
+                    l_fsn_1 = tf.concat([l_fsn_1_1, l_fsn_1_2, l_fsn_1_3], 3, name='conv_fsn_1')
+                    l_fsn_2_1 = conv_relu(l_fsn_1, 3, 64, conv_dilation=1, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_2_1')
+                    l_fsn_2_2 = conv_relu(l_fsn_1, 3, 64, conv_dilation=2, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_2_2')
+                    l_fsn_2_3 = conv_relu(l_fsn_1, 3, 64, conv_dilation=4, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_2_3')
+                    l_fsn_2_4 = conv_relu(l_fsn_1, 5, 64, conv_dilation=3, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_2_4')
+                    l_fsn_2 = tf.concat([l_fsn_2_1, l_fsn_2_2, l_fsn_2_3, l_fsn_2_4], 3, name='conv_fsn_2')
+                    l_fsn_3_1 = conv_relu(l_fsn_2, 3, 128, conv_dilation=1, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_3_1')
+                    l_fsn_3_2 = conv_relu(l_fsn_2, 3, 128, conv_dilation=2, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_3_2')
+                    l_fsn_3_3 = conv_relu(l_fsn_2, 3, 128, conv_dilation=4, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_3_3')
+                    l_fsn_3_4 = conv_relu(l_fsn_2, 5, 128, conv_dilation=3, conv_ker_init=weight_initializer,
+                                          conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_3_4')
+                    l_fsn_3 = tf.concat([l_fsn_3_1, l_fsn_3_2, l_fsn_3_3, l_fsn_3_4], 3, name='conv_fsn_3')
+                    l_fsn_4 = conv_relu(l_fsn_3, 1, 256, conv_ker_init=weight_initializer,
+                                        conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_4')
+                    fusion_out = conv(l_fsn_4, 1, self.num_landmarks, conv_ker_init=weight_initializer,
+                                   conv_bias_init=bias_init, reuse=reuse, var_scope='conv_fsn_5')
+                with tf.name_scope('upsample_net'):
+                    out = deconv(fusion_out, 8, self.num_landmarks, conv_stride=4,
+                                 conv_ker_init=deconv2d_bilinear_upsampling_initializer(
+                                     [8, 8, self.num_landmarks, self.num_landmarks]), conv_bias_init=bias_init,
+                                 reuse=reuse, var_scope='deconv_1')
+                self.all_layers = [l1, l2, l3, l4, l5, l6, l7, primary_out, l_fsn_1, l_fsn_2, l_fsn_3, l_fsn_4,
+                                   fusion_out, out]
+                return primary_out, fusion_out, out
+    def build_model(self):
+        self.pred_hm_p, self.pred_hm_f, self.pred_hm_u = self.heatmaps_network(self.images,name='heatmaps_prediction')
+    def create_loss_ops(self):
+        def nme_norm_eyes(pred_landmarks, real_landmarks, normalize=True, name='NME'):
+            """calculate normalized mean error on landmarks - normalize with inter pupil distance"""
+            with tf.name_scope(name):
+                with tf.name_scope('real_pred_landmarks_rmse'):
+                    # calculate RMS ERROR between GT and predicted lms
+                    landmarks_rms_err = tf.reduce_mean(
+                        tf.sqrt(tf.reduce_sum(tf.square(pred_landmarks - real_landmarks), axis=2)), axis=1)
+                if normalize:
+                    # normalize RMS ERROR with inter-pupil distance of GT lms
+                    with tf.name_scope('inter_pupil_dist'):
+                        with tf.name_scope('left_eye_center'):
+                            p1 = tf.reduce_mean(tf.slice(real_landmarks, [0, 42, 0], [-1, 6, 2]), axis=1)
+                        with tf.name_scope('right_eye_center'):
+                            p2 = tf.reduce_mean(tf.slice(real_landmarks, [0, 36, 0], [-1, 6, 2]), axis=1)
+                        eye_dist = tf.sqrt(tf.reduce_sum(tf.square(p1 - p2), axis=1))
+                    return landmarks_rms_err / eye_dist
+                else:
+                    return landmarks_rms_err
+        if self.mode is 'TRAIN':
+            # calculate L2 loss between ideal and predicted heatmaps
+            primary_maps_diff = self.pred_hm_p - self.heatmaps_small
+            fusion_maps_diff = self.pred_hm_f - self.heatmaps_small
+            upsample_maps_diff = self.pred_hm_u - self.heatmaps
+            self.l2_primary = tf.reduce_mean(tf.square(primary_maps_diff))
+            self.l2_fusion = tf.reduce_mean(tf.square(fusion_maps_diff))
+            self.l2_upsample = tf.reduce_mean(tf.square(upsample_maps_diff))
+            self.total_loss = 1000.*(self.l_weight_primary * self.l2_primary + self.l_weight_fusion * self.l2_fusion +
+                                     self.l_weight_upsample * self.l2_upsample)
+            # add weight decay
+            self.total_loss += self.reg * tf.add_n(
+                [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name])
+            # compute normalized mean error on gt vs. predicted landmarks (for validation)
+            if self.compute_nme:
+                self.nme_loss = tf.reduce_mean(nme_norm_eyes(self.train_pred_lms, self.train_lms))
+            if self.valid_size > 0 and self.compute_nme:
+                self.valid_nme_loss = tf.reduce_mean(nme_norm_eyes(self.valid_pred_lms, self.valid_lms))
+        elif self.mode == 'TEST' and self.compute_nme:
+            self.nme_per_image = nme_norm_eyes(self.pred_lms, self.lms)
+            self.nme_loss = tf.reduce_mean(self.nme_per_image)
+    def predict_valid_landmarks_in_batches(self, images, session):
+        num_images=int(images.shape[0])
+        num_batches = int(1.*num_images/self.batch_size)
+        if num_batches == 0:
+            batch_size = num_images
+            num_batches = 1
+        else:
+            batch_size = self.batch_size
+        for j in range(num_batches):
+            batch_images = images[j * batch_size:(j + 1) * batch_size,:,:,:]
+            batch_maps_pred = session.run(self.pred_hm_u, {self.images: batch_images})
+            batch_heat_maps_to_landmarks_alloc_once(
+                batch_maps=batch_maps_pred, batch_landmarks=self.valid_landmarks_pred[j * batch_size:(j + 1) * batch_size, :, :],
+                batch_size=batch_size,image_size=self.image_size,num_landmarks=self.num_landmarks)
+        reminder = num_images-num_batches*batch_size
+        if reminder > 0:
+            batch_images = images[-reminder:, :, :, :]
+            batch_maps_pred = session.run(self.pred_hm_u, {self.images: batch_images})
+            batch_heat_maps_to_landmarks_alloc_once(
+                batch_maps=batch_maps_pred,
+                batch_landmarks=self.valid_landmarks_pred[-reminder:, :, :],
+                batch_size=reminder, image_size=self.image_size, num_landmarks=self.num_landmarks)
+    def create_summary_ops(self):
+        """create summary ops for logging"""
+        # loss summary
+        l2_primary = tf.summary.scalar('l2_primary', self.l2_primary)
+        l2_fusion = tf.summary.scalar('l2_fusion', self.l2_fusion)
+        l2_upsample = tf.summary.scalar('l2_upsample', self.l2_upsample)
+        l_total = tf.summary.scalar('l_total', self.total_loss)
+        self.batch_summary_op = tf.summary.merge([l2_primary,l2_fusion,l2_upsample,l_total])
+        if self.compute_nme:
+            nme = tf.summary.scalar('nme', self.nme_loss)
+            self.batch_summary_op = tf.summary.merge([self.batch_summary_op, nme])
+        if self.log_histograms:
+            var_summary = [tf.summary.histogram(var.name,var) for var in tf.trainable_variables()]
+            grads = tf.gradients(self.total_loss, tf.trainable_variables())
+            grads = list(zip(grads, tf.trainable_variables()))
+            grad_summary = [tf.summary.histogram(var.name+'/grads',grad) for grad,var in grads]
+            activ_summary = [tf.summary.histogram(layer.name, layer) for layer in self.all_layers]
+            self.batch_summary_op = tf.summary.merge([self.batch_summary_op, var_summary, grad_summary, activ_summary])
+        if self.valid_size > 0 and self.compute_nme:
+            self.valid_summary = tf.summary.scalar('valid_nme', self.valid_nme_loss)
+        if self.sample_to_log:
+            img_map_summary_small = tf.summary.image('compare_map_to_gt_small', self.log_image_map_small)
+            img_map_summary = tf.summary.image('compare_map_to_gt', self.log_image_map)
+            if self.sample_per_channel:
+                map_channels_summary = tf.summary.image('compare_map_channels_to_gt', self.log_map_channels)
+                map_channels_summary_small = tf.summary.image('compare_map_channels_to_gt_small',
+                                                              self.log_map_channels_small)
+                self.img_summary = tf.summary.merge(
+                    [img_map_summary, img_map_summary_small,map_channels_summary,map_channels_summary_small])
+            else:
+                self.img_summary = tf.summary.merge([img_map_summary, img_map_summary_small])
+            if self.valid_size >= self.sample_grid:
+                img_map_summary_valid_small = tf.summary.image('compare_map_to_gt_small_valid', self.log_image_map_small)
+                img_map_summary_valid = tf.summary.image('compare_map_to_gt_valid', self.log_image_map)
+                if self.sample_per_channel:
+                    map_channels_summary_valid_small = tf.summary.image('compare_map_channels_to_gt_small_valid',
+                                                                        self.log_map_channels_small)
+                    map_channels_summary_valid = tf.summary.image('compare_map_channels_to_gt_valid',
+                                                                  self.log_map_channels)
+                    self.img_summary_valid = tf.summary.merge(
+                        [img_map_summary_valid,img_map_summary_valid_small,map_channels_summary_valid,
+                         map_channels_summary_valid_small])
+                else:
+                    self.img_summary_valid = tf.summary.merge([img_map_summary_valid, img_map_summary_valid_small])
+    def train(self):
+        # set random seed
+        tf.set_random_seed(1234)
+        np.random.seed(1234)
+        # build a graph
+        # add placeholders
+        self.add_placeholders()
+        # build model
+        self.build_model()
+        # create loss ops
+        self.create_loss_ops()
+        # create summary ops
+        self.create_summary_ops()
+        # create optimizer and training op
+        global_step = tf.Variable(0, trainable=False)
+        lr = tf.train.exponential_decay(self.learning_rate,global_step, self.step, self.gamma, staircase=True)
+        if self.adam_optimizer:
+            optimizer = tf.train.AdamOptimizer(lr)
+        else:
+            optimizer = tf.train.MomentumOptimizer(lr, self.momentum)
+        train_op = optimizer.minimize(self.total_loss,global_step=global_step)
+        with tf.Session(config=self.config) as sess:
+            tf.global_variables_initializer().run()
+            # load pre trained weights if load_pretrain==True
+            if self.load_pretrain:
+                print
+                print('*** loading pre-trained weights from: '+self.pre_train_path+' ***')
+                if self.load_primary_only:
+                    print('*** loading primary-net only ***')
+                    primary_var = [v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if
+                                   ('deconv_' not in v.name) and ('_fsn_' not in v.name)]
+                    loader = tf.train.Saver(var_list=primary_var)
+                else:
+                    loader = tf.train.Saver()
+                loader.restore(sess, self.pre_train_path)
+                print("*** Model restore finished, current global step: %d" % global_step.eval())
+            # for fine-tuning, choose reset_training_op==True. when resuming training, reset_training_op==False
+            if self.reset_training_op:
+                print ("resetting optimizer and global step")
+                opt_var_list = [optimizer.get_slot(var, name) for name in optimizer.get_slot_names()
+                                 for var in tf.global_variables() if optimizer.get_slot(var, name) is not None]
+                opt_var_list_init = tf.variables_initializer(opt_var_list)
+                opt_var_list_init.run()
+                sess.run(global_step.initializer)
+            # create model saver and file writer
+            summary_writer = tf.summary.FileWriter(logdir=self.save_log_path, graph=tf.get_default_graph())
+            saver = tf.train.Saver()
+            print('\n*** Start Training ***')
+            # initialize some variables before training loop
+            resume_step = global_step.eval()
+            num_train_images = len(self.img_menpo_list)
+            batches_in_epoch = int(float(num_train_images) / float(self.batch_size))
+            epoch = int(resume_step / batches_in_epoch)
+            img_inds = self.epoch_inds_shuffle[epoch, :]
+            log_valid = True
+            log_valid_images = True
+            # allocate space for batch images, maps and landmarks
+            batch_images = np.zeros([self.batch_size, self.image_size, self.image_size, self.c_dim]).astype(
+                'float32')
+            batch_lms = np.zeros([self.batch_size, self.num_landmarks, 2]).astype('float32')
+            batch_lms_pred = np.zeros([self.batch_size, self.num_landmarks, 2]).astype('float32')
+            batch_maps_small = np.zeros((self.batch_size, int(self.image_size/4),
+                                         int(self.image_size/4), self.num_landmarks)).astype('float32')
+            batch_maps = np.zeros((self.batch_size, self.image_size, self.image_size,
+                                   self.num_landmarks)).astype('float32')
+            # create gaussians for heatmap generation
+            gaussian_filt_large = create_gaussian_filter(sigma=self.sigma, win_mult=self.win_mult)
+            gaussian_filt_small = create_gaussian_filter(sigma=1.*self.sigma/4, win_mult=self.win_mult)
+            # training loop
+            for step in range(resume_step, self.train_iter):
+                j = step % batches_in_epoch  # j==0 if we finished an epoch
+                # if we finished an epoch and this isn't the first step
+                if step > resume_step and j == 0:
+                    epoch += 1
+                    img_inds = self.epoch_inds_shuffle[epoch, :]  # get next shuffled image inds
+                    log_valid = True
+                    log_valid_images = True
+                    if self.use_epoch_data:  # if using pre-augmented data, load epoch directory
+                        epoch_dir = os.path.join(self.epoch_data_dir, str(epoch))
+                        self.img_menpo_list = load_menpo_image_list(
+                            self.img_path, train_crop_dir=epoch_dir, img_dir_ns=None, mode=self.mode,
+                            bb_dictionary=self.bb_dictionary, image_size=self.image_size, test_data=self.test_data,
+                            augment_basic=False, augment_texture=False, augment_geom=False)
+                # get batch indices
+                batch_inds = img_inds[j * self.batch_size:(j + 1) * self.batch_size]
+                # load batch images, gt maps and landmarks
+                load_images_landmarks_approx_maps_alloc_once(
+                    self.img_menpo_list, batch_inds, images=batch_images, maps_small=batch_maps_small,
+                    maps=batch_maps, landmarks=batch_lms, image_size=self.image_size,
+                    num_landmarks=self.num_landmarks, scale=self.scale, gauss_filt_large=gaussian_filt_large,
+                    gauss_filt_small=gaussian_filt_small, win_mult=self.win_mult, sigma=self.sigma,
+                    save_landmarks=self.compute_nme)
+                feed_dict_train = {self.images: batch_images, self.heatmaps: batch_maps,
+                                   self.heatmaps_small: batch_maps_small}
+                # train on batch
+                sess.run(train_op, feed_dict_train)
+                # save to log and print status
+                if step == resume_step or (step + 1) % self.print_every == 0:
+                    # train data log
+                    if self.compute_nme:
+                        batch_maps_pred = sess.run(self.pred_hm_u, {self.images: batch_images})
+                        batch_heat_maps_to_landmarks_alloc_once(
+                            batch_maps=batch_maps_pred,batch_landmarks=batch_lms_pred,
+                            batch_size=self.batch_size, image_size=self.image_size,
+                            num_landmarks=self.num_landmarks)
+                        train_feed_dict_log = {
+                            self.images: batch_images, self.heatmaps: batch_maps,
+                            self.heatmaps_small: batch_maps_small, self.train_lms: batch_lms,
+                            self.train_pred_lms: batch_lms_pred}
+                        summary, l_p, l_f, l_t, nme = sess.run(
+                            [self.batch_summary_op, self.l2_primary, self.l2_fusion, self.total_loss,
+                             self.nme_loss],
+                            train_feed_dict_log)
+                        print (
+                            'epoch: [%d] step: [%d/%d] primary loss: [%.6f] fusion loss: [%.6f]'
+                            ' total loss: [%.6f] NME: [%.6f]' % (
+                                epoch, step + 1, self.train_iter, l_p, l_f, l_t, nme))
+                    else:
+                        train_feed_dict_log = {self.images: batch_images, self.heatmaps: batch_maps,
+                                               self.heatmaps_small: batch_maps_small}
+                        summary, l_p, l_f, l_t = sess.run(
+                            [self.batch_summary_op, self.l2_primary, self.l2_fusion, self.total_loss],
+                            train_feed_dict_log)
+                        print (
+                            'epoch: [%d] step: [%d/%d] primary loss: [%.6f] fusion loss: [%.6f] total loss: [%.6f]'
+                            % (epoch, step + 1, self.train_iter, l_p, l_f, l_t))
+                    summary_writer.add_summary(summary, step)
+                    # valid data log
+                    if self.valid_size > 0 and (log_valid and epoch % self.log_valid_every == 0) \
+                            and self.compute_nme:
+                        log_valid = False
+                        self.predict_valid_landmarks_in_batches(self.valid_images_loaded, sess)
+                        valid_feed_dict_log = {
+                            self.valid_lms: self.valid_landmarks_loaded,
+                            self.valid_pred_lms: self.valid_landmarks_pred}
+                        v_summary, v_nme = sess.run([self.valid_summary, self.valid_nme_loss],
+                                                      valid_feed_dict_log)
+                        summary_writer.add_summary(v_summary, step)
+                        print (
+                            'epoch: [%d] step: [%d/%d] valid NME: [%.6f]' % (
+                                epoch, step + 1, self.train_iter, v_nme))
+                # save model
+                if (step + 1) % self.save_every == 0:
+                    saver.save(sess, os.path.join(self.save_model_path, 'deep_heatmaps'), global_step=step + 1)
+                    print ('model/deep-heatmaps-%d saved' % (step + 1))
+                # save images
+                if step == resume_step or (step + 1) % self.sample_every == 0:
+                    batch_maps_small_pred = sess.run(self.pred_hm_p, {self.images: batch_images})
+                    if not self.compute_nme:
+                        batch_maps_pred = sess.run(self.pred_hm_u,  {self.images: batch_images})
+                        batch_lms_pred = None
+                    merged_img = merge_images_landmarks_maps_gt(
+                        batch_images.copy(), batch_maps_pred, batch_maps, landmarks=batch_lms_pred,
+                        image_size=self.image_size, num_landmarks=self.num_landmarks, num_samples=self.sample_grid,
+                        scale=self.scale, circle_size=2, fast=self.fast_img_gen)
+                    merged_img_small = merge_images_landmarks_maps_gt(
+                        batch_images.copy(), batch_maps_small_pred, batch_maps_small,
+                        image_size=self.image_size,
+                        num_landmarks=self.num_landmarks, num_samples=self.sample_grid, scale=self.scale,
+                        circle_size=0, fast=self.fast_img_gen)
+                    if self.sample_per_channel:
+                        map_per_channel = map_comapre_channels(
+                            batch_images.copy(), batch_maps_pred, batch_maps, image_size=self.image_size,
+                            num_landmarks=self.num_landmarks, scale=self.scale)
+                        map_per_channel_small = map_comapre_channels(
+                            batch_images.copy(), batch_maps_small_pred, batch_maps_small, image_size=int(self.image_size/4),
+                            num_landmarks=self.num_landmarks, scale=self.scale)
+                    if self.sample_to_log:  # save heatmap images to log
+                        if self.sample_per_channel:
+                            summary_img = sess.run(
+                                self.img_summary, {self.log_image_map: np.expand_dims(merged_img, 0),
+                                                   self.log_map_channels: np.expand_dims(map_per_channel, 0),
+                                                   self.log_image_map_small: np.expand_dims(merged_img_small, 0),
+                                                   self.log_map_channels_small: np.expand_dims(map_per_channel_small, 0)})
+                        else:
+                            summary_img = sess.run(
+                                self.img_summary, {self.log_image_map: np.expand_dims(merged_img, 0),
+                                                   self.log_image_map_small: np.expand_dims(merged_img_small, 0)})
+                        summary_writer.add_summary(summary_img, step)
+                        if (self.valid_size >= self.sample_grid) and self.save_valid_images and\
+                                (log_valid_images and epoch % self.log_valid_every == 0):
+                            log_valid_images = False
+                            batch_maps_small_pred_val,batch_maps_pred_val =\
+                                sess.run([self.pred_hm_p,self.pred_hm_u],
+                                         {self.images: self.valid_images_loaded[:self.sample_grid]})
+                            merged_img_small = merge_images_landmarks_maps_gt(
+                                self.valid_images_loaded[:self.sample_grid].copy(), batch_maps_small_pred_val,
+                                self.valid_gt_maps_small_loaded, image_size=self.image_size,
+                                num_landmarks=self.num_landmarks, num_samples=self.sample_grid,
+                                scale=self.scale, circle_size=0, fast=self.fast_img_gen)
+                            merged_img = merge_images_landmarks_maps_gt(
+                                self.valid_images_loaded[:self.sample_grid].copy(), batch_maps_pred_val,
+                                self.valid_gt_maps_loaded, image_size=self.image_size,
+                                num_landmarks=self.num_landmarks, num_samples=self.sample_grid,
+                                scale=self.scale, circle_size=2, fast=self.fast_img_gen)
+                            if self.sample_per_channel:
+                                map_per_channel_small = map_comapre_channels(
+                                    self.valid_images_loaded[:self.sample_grid].copy(), batch_maps_small_pred_val,
+                                    self.valid_gt_maps_small_loaded, image_size=int(self.image_size / 4),
+                                    num_landmarks=self.num_landmarks, scale=self.scale)
+                                map_per_channel = map_comapre_channels(
+                                    self.valid_images_loaded[:self.sample_grid].copy(), batch_maps_pred,
+                                    self.valid_gt_maps_loaded, image_size=self.image_size,
+                                    num_landmarks=self.num_landmarks, scale=self.scale)
+                                summary_img = sess.run(
+                                    self.img_summary_valid,
+                                    {self.log_image_map: np.expand_dims(merged_img, 0),
+                                     self.log_map_channels: np.expand_dims(map_per_channel, 0),
+                                     self.log_image_map_small: np.expand_dims(merged_img_small, 0),
+                                     self.log_map_channels_small: np.expand_dims(map_per_channel_small, 0)})
+                            else:
+                                summary_img = sess.run(
+                                    self.img_summary_valid,
+                                    {self.log_image_map: np.expand_dims(merged_img, 0),
+                                     self.log_image_map_small: np.expand_dims(merged_img_small, 0)})
+                            summary_writer.add_summary(summary_img, step)
+                    else:  # save heatmap images to directory
+                        sample_path_imgs = os.path.join(
+                            self.save_sample_path, 'epoch-%d-train-iter-%d-1.png' % (epoch, step + 1))
+                        sample_path_imgs_small = os.path.join(
+                            self.save_sample_path, 'epoch-%d-train-iter-%d-1-s.png' % (epoch, step + 1))
+                        scipy.misc.imsave(sample_path_imgs, merged_img)
+                        scipy.misc.imsave(sample_path_imgs_small, merged_img_small)
+                        if self.sample_per_channel:
+                            sample_path_ch_maps = os.path.join(
+                                self.save_sample_path, 'epoch-%d-train-iter-%d-3.png' % (epoch, step + 1))
+                            sample_path_ch_maps_small = os.path.join(
+                                self.save_sample_path, 'epoch-%d-train-iter-%d-3-s.png' % (epoch, step + 1))
+                            scipy.misc.imsave(sample_path_ch_maps, map_per_channel)
+                            scipy.misc.imsave(sample_path_ch_maps_small, map_per_channel_small)
+            print('*** Finished Training ***')
+    def get_image_maps(self, test_image, reuse=None, norm=False):
+        """ returns heatmaps of input image (menpo image object)"""
+        self.add_placeholders()
+        # build model
+        pred_hm_p, pred_hm_f, pred_hm_u = self.heatmaps_network(self.images, reuse=reuse)
+        with tf.Session(config=self.config) as sess:
+            # load trained parameters
+            saver = tf.train.Saver()
+            saver.restore(sess, self.test_model_path)
+            _, model_name = os.path.split(self.test_model_path)
+            test_image = test_image.pixels_with_channels_at_back().astype('float32')
+            if norm:
+                if self.scale is '255':
+                    test_image *= 255
+                elif self.scale is '0':
+                    test_image = 2 * test_image - 1
+            map_primary, map_fusion, map_upsample = sess.run(
+                [pred_hm_p, pred_hm_f, pred_hm_u], {self.images: np.expand_dims(test_image, 0)})
+        return map_primary, map_fusion, map_upsample
+    def get_landmark_predictions(self, img_list, pdm_models_dir, clm_model_path, reuse=None, map_to_input_size=False):
+        """returns dictionary with landmark predictions of each step of the ECpTp algorithm and ECT"""
+        from thirdparty.face_of_art.pdm_clm_functions import feature_based_pdm_corr, clm_correct
+        jaw_line_inds = np.arange(0, 17)
+        left_brow_inds = np.arange(17, 22)
+        right_brow_inds = np.arange(22, 27)
+        self.add_placeholders()
+        # build model
+        _, _, pred_hm_u = self.heatmaps_network(self.images, reuse=reuse)
+        with tf.Session(config=self.config) as sess:
+            # load trained parameters
+            saver = tf.train.Saver()
+            saver.restore(sess, self.test_model_path)
+            _, model_name = os.path.split(self.test_model_path)
+            e_list = []
+            ect_list = []
+            ecp_list = []
+            ecpt_list = []
+            ecptp_jaw_list = []
+            ecptp_out_list = []
+            for test_image in img_list:
+                if map_to_input_size:
+                    test_image_transform = test_image[1]
+                    test_image=test_image[0]
+                # get landmarks for estimation stage
+                if test_image.n_channels < 3:
+                    test_image_map = sess.run(
+                        pred_hm_u, {self.images: np.expand_dims(
+                            gray2rgb(test_image.pixels_with_channels_at_back()).astype('float32'), 0)})
+                else:
+                    test_image_map = sess.run(
+                        pred_hm_u, {self.images: np.expand_dims(
+                            test_image.pixels_with_channels_at_back().astype('float32'), 0)})
+                init_lms = heat_maps_to_landmarks(np.squeeze(test_image_map))
+                # get landmarks for part-based correction stage
+                p_pdm_lms = feature_based_pdm_corr(lms_init=init_lms, models_dir=pdm_models_dir, train_type='basic')
+                # get landmarks for part-based tuning stage
+                try:  # clm may not converge
+                    pdm_clm_lms = clm_correct(
+                        clm_model_path=clm_model_path, image=test_image, map=test_image_map, lms_init=p_pdm_lms)
+                except:
+                    pdm_clm_lms = p_pdm_lms.copy()
+                # get landmarks ECT
+                try:  # clm may not converge
+                    ect_lms = clm_correct(
+                        clm_model_path=clm_model_path, image=test_image, map=test_image_map, lms_init=init_lms)
+                except:
+                    ect_lms = p_pdm_lms.copy()
+                # get landmarks for ECpTp_out (tune jaw and eyebrows)
+                ecptp_out = p_pdm_lms.copy()
+                ecptp_out[left_brow_inds] = pdm_clm_lms[left_brow_inds]
+                ecptp_out[right_brow_inds] = pdm_clm_lms[right_brow_inds]
+                ecptp_out[jaw_line_inds] = pdm_clm_lms[jaw_line_inds]
+                # get landmarks for ECpTp_jaw (tune jaw)
+                ecptp_jaw = p_pdm_lms.copy()
+                ecptp_jaw[jaw_line_inds] = pdm_clm_lms[jaw_line_inds]
+                if map_to_input_size:
+                    ecptp_jaw = test_image_transform.apply(ecptp_jaw)
+                    ecptp_out = test_image_transform.apply(ecptp_out)
+                    ect_lms = test_image_transform.apply(ect_lms)
+                    init_lms = test_image_transform.apply(init_lms)
+                    p_pdm_lms = test_image_transform.apply(p_pdm_lms)
+                    pdm_clm_lms = test_image_transform.apply(pdm_clm_lms)
+                ecptp_jaw_list.append(ecptp_jaw)  # E + p-correction + p-tuning (ECpTp_jaw)
+                ecptp_out_list.append(ecptp_out)  # E + p-correction + p-tuning (ECpTp_out)
+                ect_list.append(ect_lms)  # ECT prediction
+                e_list.append(init_lms)  # init prediction from heatmap network (E)
+                ecp_list.append(p_pdm_lms)  # init prediction + part pdm correction (ECp)
+                ecpt_list.append(pdm_clm_lms)  # init prediction + part pdm correction + global tuning (ECpT)
+            pred_dict = {
+                'E': e_list,
+                'ECp': ecp_list,
+                'ECpT': ecpt_list,
+                'ECT': ect_list,
+                'ECpTp_jaw': ecptp_jaw_list,
+                'ECpTp_out': ecptp_out_list
+            }
+            return pred_dict