Spaces:
Runtime error
Runtime error
from typing import Dict, List, Union | |
import numpy as np | |
import onnx.numpy_helper as onph | |
from google.protobuf.internal.containers import ( | |
RepeatedCompositeFieldContainer, | |
RepeatedScalarFieldContainer, | |
) | |
from onnx.onnx_pb import AttributeProto, GraphProto, ModelProto, NodeProto, TensorProto | |
from nodes.log import logger | |
from ..ncnn.model import ( | |
DTYPE_FP16, | |
DTYPE_FP32, | |
BinaryOpTypes, | |
EltwiseOpTypes, | |
GruDirectionFlags, | |
InterpResizeTypes, | |
NcnnLayer, | |
NcnnModel, | |
NormalizeEpsModes, | |
PaddingTypes, | |
PadModes, | |
PermuteOrderTypes, | |
ReductionOpTypes, | |
UnaryOpTypes, | |
) | |
from ..ncnn.optimizer import NcnnOptimizer | |
from .tensorproto_utils import * | |
UOT = UnaryOpTypes | |
BOT = BinaryOpTypes | |
EOT = EltwiseOpTypes | |
GRU = GruDirectionFlags | |
IRT = InterpResizeTypes | |
NEM = NormalizeEpsModes | |
PAM = PadModes | |
PAT = PaddingTypes | |
POT = PermuteOrderTypes | |
ROT = ReductionOpTypes | |
class Onnx2NcnnConverter: | |
def __init__(self, onnx_model: ModelProto): | |
self.onnx_graph: GraphProto = onnx_model.graph | |
self.mutable_graph_nodes: List[NodeProto] = list(self.onnx_graph.node) | |
self.node_count: int = len(self.onnx_graph.node) | |
self.weights: Dict[str, TensorProto] = { | |
initializer.name: initializer for initializer in self.onnx_graph.initializer | |
} | |
self.producers: Dict[str, None] = {i.name: None for i in self.onnx_graph.input} | |
self.node_reference: Dict[str, int] = {} | |
self.blob_names: Dict[str, None] = {} | |
def add_weight( | |
layer: NcnnLayer, | |
weight_name: str, | |
data: Union[float, int, np.ndarray, TensorProto], | |
quantize_tag: bytes = b"", | |
) -> int: | |
if isinstance(data, TensorProto): | |
data = onph.to_array(data) | |
return layer.add_weight(weight_name, data, quantize_tag) | |
def clear_container( | |
container: Union[RepeatedCompositeFieldContainer, RepeatedScalarFieldContainer], | |
) -> None: | |
for _ in range(len(container)): | |
container.pop() | |
def swap_nodes(self, a: int, b: int) -> None: | |
self.mutable_graph_nodes[a], self.mutable_graph_nodes[b] = ( | |
self.mutable_graph_nodes[b], | |
self.mutable_graph_nodes[a], | |
) | |
def fuse_rewrite_gather(self) -> None: | |
for gather in self.mutable_graph_nodes: | |
if gather.op_type == "Gather": | |
indices = get_node_attr_from_input_ai(self.weights[gather.input[1]]) | |
if len(indices) == 1: | |
# Reconstruct node connections | |
self.node_reference[gather.input[1]] -= 1 | |
origin_inp = gather.input[0] | |
gather.ClearField("input") | |
gather.input.append(origin_inp) | |
# Update axis, starts and ends | |
axis = get_node_attr_i(gather, "axis", 1) | |
gather.op_type = "Crop" | |
gather.ClearField("attribute") | |
index = indices[0] | |
set_node_attr_ai(gather, "starts", np.array([index], np.int32)) | |
set_node_attr_ai(gather, "ends", np.array([index + 1], np.int32)) | |
set_node_attr_ai(gather, "axis", np.array([axis], np.int32)) | |
def fuse_weight_reshape(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
if node.op_type == "Reshape": | |
if node.input[0] in self.weights: | |
self.weights[node.output[0]] = self.weights[node.input[0]] | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
elif len(node.input) == 2: | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
else: | |
shape = np.empty(0, np.int64) | |
self.clear_container(self.weights[node.output[0]].dims) | |
for dim in shape: | |
self.weights[node.output[0]].dims.append(dim) | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 1 | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_weight_transpose(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
if node.op_type == "Transpose": | |
if ( | |
node.input[0] in self.weights | |
and len(self.weights[node.input[0]].dims) == 2 | |
): | |
perm = get_node_attr_ai(node, "perm") | |
if perm.size != 2 or perm[0] != 1 or perm[1] != 0: | |
continue | |
self.weights[node.output[0]] = self.weights[node.input[0]] | |
# Permute weight | |
B = self.weights[node.output[0]] | |
h, w = B.dims[:2] | |
permuted_data = onph.to_array(B).T | |
B.dims[:2] = (w, h) | |
if B.raw_data: | |
B.raw_data = permuted_data.tobytes() | |
else: | |
self.clear_container(B.float_data) | |
B.float_data.extend(permuted_data) | |
# Reduce | |
node.op_type = "noop_reducednccn" | |
self.node_reference[node.input[0]] -= 1 | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_shufflechannel(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# ShuffleChannel <= Reshape - Transpose - Reshape | |
# ShuffleChannel <= Reshape - Transpose - Constant - Reshape | |
if node.op_type == "Reshape": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
else: | |
# Skip weight reshape | |
if node.input[1] not in self.weights: | |
continue | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
# 1 groups channels_per_group, height, width | |
# reverse style = channels_per_group, groups, height * width | |
if (shape.size != 5 and shape.size != 3) or ( | |
shape.size == 5 and shape[0] != 1 | |
): | |
continue | |
if i + 2 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node3.op_type == "Constant": | |
if i + 3 >= self.node_count: | |
continue | |
node3 = self.mutable_graph_nodes[i + 3] | |
if (node2.op_type != "Transpose" or node3.op_type != "Reshape") or ( | |
self.node_reference[node2.output[0]] != 1 | |
): | |
continue | |
# 0 2 1 3 4 | |
# reverse style = 1 0 2 | |
perm = get_node_attr_ai(node2, "perm") | |
if perm.size != 5 and perm.size != 3: | |
continue | |
if perm.size == 5 and ( | |
perm[0] != 0 | |
or perm[1] != 2 | |
or perm[2] != 1 | |
or perm[3] != 3 | |
or perm[4] != 4 | |
): | |
continue | |
if perm.size == 3 and (perm[0] != 1 or perm[1] != 0 or perm[2] != 2): | |
continue | |
if len(node3.input) == 1: | |
shape3 = get_node_attr_ai(node3, "shape") | |
else: | |
if node3.input[1] not in self.weights: | |
continue | |
shape3 = get_node_attr_from_input_ai(self.weights[node3.input[1]]) | |
# 1, -1, height, width | |
# reverse style = group, -1, channels_per_group, height, width | |
if shape3.size != 4 and shape3.size != 5: | |
continue | |
if shape3.size == 4 and ( | |
shape3[0] != 1 | |
or (shape3[1] != -1 and shape3[1] != shape[1] * shape[2]) | |
): | |
continue | |
if shape3.size == 5 and ( | |
shape3[0] != shape[1] | |
or shape3[2] != shape[0] | |
or shape3[3] * shape3[4] != shape[2] | |
): | |
continue | |
# Reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node3.op_type = "ShuffleChannel" | |
node3.input[0] = node.input[0] | |
attr_group = AttributeProto(name="group", i=shape[1], type=APT.INT) | |
node3.attribute.append(attr_group) | |
attr_reverse = AttributeProto( | |
name="reverse", i=int(shape.size == 3), type=APT.INT | |
) | |
node3.attribute.append(attr_reverse) | |
reduced_node_count[0] += 2 | |
i += 2 | |
def fuse_shufflechannel_split(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Split <= ShuffleChannel(reverse type) - Gather(0) - Gather(1) | |
if node.op_type == "ShuffleChannel": | |
# reverse = 1 | |
reverse = get_node_attr_i(node, "reverse") | |
if reverse != 1 or (i + 2 >= self.node_count): | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node2.op_type != "Gather" or node3.op_type != "Gather": | |
continue | |
if node2.input[0] != node.output[0] or node3.input[0] != node.output[0]: | |
continue | |
# axis = 0 or indices = 0 | |
gather2_axis = get_node_attr_i(node2, "axis") | |
if gather2_axis != 0 or node2.input[1] not in self.weights: | |
continue | |
gather2_indices = get_node_attr_from_input_ai( | |
self.weights[node2.input[1]] | |
) | |
if gather2_indices.size != 1 or gather2_indices[0] != 0: | |
continue | |
# axis = 0 or indices = 1 | |
gather3_axis = get_node_attr_i(node3, "axis") | |
if gather3_axis != 0 or node3.input[1] not in self.weights: | |
continue | |
gather3_indices = get_node_attr_from_input_ai( | |
self.weights[node3.input[1]] | |
) | |
if gather3_indices.size != 1 or gather2_indices[0] != 1: | |
continue | |
# reduce | |
node2.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 2 | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node3.input[1]] -= 1 | |
node3.op_type = "Split" | |
node3.ClearField("input") | |
node3.input.append(node.output[0]) | |
node3.output.append(node3.output[0]) | |
node3.output[0] = node2.output[0] | |
node3.ClearField("attribute") | |
attr_axis = AttributeProto(name="axis", i=1, type=APT.INT) | |
node3.attribute.append(attr_axis) | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_hardswish(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# HardSwish <= Add(+3) - Clip(0, 6) - Mul(X, ) - Div( / 6) | |
# HardSwish <= Add(+3) - Clip(0, 6) - Mul(X, ) - Mul(*(1 / 6)) | |
# HardSwish <= Add(+3) - Clip(0, 6) - Mul(X, ) - Constant - Div( / 6) | |
# HardSwish <= Add(+3) - Clip(0, 6) - Mul(X, ) - Constant - Mul(*(1 / 6)) | |
# out = x * F.relu6(x + 3, inplace=True) / 6 | |
if node.op_type == "Add": | |
if ( | |
self.node_reference[node.output[0]] != 1 | |
or i + 3 >= self.node_count | |
or node.input[1] not in self.weights | |
): | |
continue | |
add_three = self.weights[node.input[1]] | |
if ( | |
len(add_three.dims) != 0 | |
or get_tensor_proto_data_size(add_three, add_three.data_type) != 1 | |
): | |
continue | |
constant_add_three = get_node_attr_from_input_f(add_three) | |
if constant_add_three != 3: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
if node4.op_type == "Constant": | |
if i + 4 >= self.node_count: | |
continue | |
node4 = self.mutable_graph_nodes[i + 4] | |
if ( | |
node2.op_type != "Clip" | |
or node3.op_type != "Mul" | |
or (node4.op_type != "Div" and node4.op_type != "Mul") | |
): | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
if len(node2.input) == 1: | |
relu6_min = get_node_attr_f(node2, "min", -FLOAT32_MAX) | |
relu6_max = get_node_attr_f(node2, "max", FLOAT32_MAX) | |
else: | |
min_tp = self.weights[node2.input[1]] | |
max_tp = self.weights[node2.input[2]] | |
relu6_min = get_node_attr_from_input_f(min_tp) | |
relu6_max = get_node_attr_from_input_f(max_tp) | |
if relu6_min != 0 or relu6_max != 6: | |
continue | |
if self.node_reference[node3.output[0]] != 1: | |
continue | |
if node3.input[0] != node.input[0] or node3.input[1] != node2.output[0]: | |
continue | |
if node4.input[1] not in self.weights: | |
continue | |
div_six = self.weights[node4.input[1]] | |
if ( | |
len(div_six.dims) != 0 | |
or get_tensor_proto_data_size(div_six, div_six.data_type) != 1 | |
): | |
continue | |
constant_div_six = get_node_attr_from_input_f(div_six) | |
if (node4.op_type == "Div" and constant_div_six != 6) or ( | |
node4.op_type == "Mul" and constant_div_six != 1 / 6 | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 1 | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
if len(node2.input) == 3: | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node2.input[2]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
self.node_reference[node3.output[0]] -= 1 | |
self.node_reference[node4.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
node4.op_type = "HardSwish" | |
node4.ClearField("input") | |
node4.input.append(node.input[0]) | |
attr_alpha = AttributeProto(name="alpha", f=1 / 6, type=APT.FLOAT) | |
node4.attribute.append(attr_alpha) | |
attr_beta = AttributeProto(name="beta", f=0.5, type=APT.FLOAT) | |
node4.attribute.append(attr_beta) | |
reduced_node_count[0] += 3 | |
i += 3 | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# HardSwish <= HardSigmoid - Mul | |
# out = x * hsigmoid(x) | |
if node.op_type == "HardSigmoid": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
alpha = get_node_attr_f(node, "alpha", 0.2) | |
beta = get_node_attr_f(node, "beta", 0.5) | |
if i + 1 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type != "Mul": | |
continue | |
if node2.input[0] != node.input[0] or node2.input[1] != node.output[0]: | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node2.op_type = "HardSwish" | |
node2.ClearField("input") | |
node2.input.append(node.input[0]) | |
attr_alpha = AttributeProto(name="alpha", f=alpha, type=APT.FLOAT) | |
node2.attribute.append(attr_alpha) | |
attr_beta = AttributeProto(name="beta", f=beta, type=APT.FLOAT) | |
node2.attribute.append(attr_beta) | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_hardsigmoid(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# HardSigmoid <= Add(+3) - Clip(0, 6) - Div( / 6) | |
# HardSigmoid <= Add(+3) - Clip(0, 6) - Mul(*(1 / 6)) | |
# HardSigmoid <= Add(+3) - Clip(0, 6) - Constant - Div( / 6) | |
# HardSigmoid <= Add(+3) - Clip(0, 6) - Constant - Mul(*(1 / 6)) | |
# out = F.relu6(x + 3, inplace=True) / 6 | |
if node.op_type == "Add": | |
if ( | |
self.node_reference[node.output[0]] != 1 | |
or i + 2 >= self.node_count | |
or node.input[1] not in self.weights | |
): | |
continue | |
add_three = self.weights[node.input[1]] | |
if ( | |
len(add_three.dims) != 0 | |
or get_tensor_proto_data_size(add_three, add_three.data_type) != 1 | |
): | |
continue | |
constant_add_three = self.weights[node.input[1]] | |
if constant_add_three != 3: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node3.op_type == "Constant": | |
if i + 3 >= self.node_count: | |
continue | |
node3 = self.mutable_graph_nodes[i + 3] | |
if node2.op_type != "Clip" or ( | |
node3.op_type != "Div" and node3.op_type != "Mul" | |
): | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
if len(node2.input) == 1: | |
relu6_min = get_node_attr_f(node2, "min", -FLOAT32_MAX) | |
relu6_max = get_node_attr_f(node2, "max", FLOAT32_MAX) | |
else: | |
min_tp = self.weights[node2.input[1]] | |
max_tp = self.weights[node2.input[2]] | |
relu6_min = get_node_attr_from_input_f(min_tp) | |
relu6_max = get_node_attr_from_input_f(max_tp) | |
if relu6_min != 0 or relu6_max != 6: | |
continue | |
if node3.input[1] not in self.weights: | |
continue | |
div_six = self.weights[node3.input[1]] | |
if ( | |
len(div_six.dims) != 0 | |
or get_tensor_proto_data_size(div_six, div_six.data_type) != 1 | |
): | |
continue | |
constant_div_six = get_node_attr_from_input_f(div_six) | |
if (node3.op_type == "Div" and constant_div_six != 6) or ( | |
node3.op_type == "Mul" and constant_div_six != 1 / 6 | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
if len(node2.input) == 3: | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node2.input[2]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node3.op_type = "HardSigmoid" | |
node3.ClearField("input") | |
node3.input.append(node.input[0]) | |
attr_alpha = AttributeProto(name="alpha", f=1 / 6, type=APT.FLOAT) | |
node3.attribute.append(attr_alpha) | |
attr_beta = AttributeProto(name="beta", f=0.5, type=APT.FLOAT) | |
node3.attribute.append(attr_beta) | |
reduced_node_count[0] += 2 | |
i += 2 | |
def fuse_swish(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Swish <= Sigmoid - Mul | |
# x * torch.sigmoid(x) | |
if node.op_type == "Sigmoid": | |
if self.node_reference[node.output[0]] != 1 or i + 1 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type != "Mul": | |
continue | |
if node2.input[0] != node.input[0] or node2.input[1] != node.output[0]: | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node2.op_type = "Swish" | |
node2.ClearField("input") | |
node2.input.append(node.input[0]) | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_batchnorm1d_squeeze_unsqueeze(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# BatchNormalization <= Unsqueeze - BatchNormalization - Squeeze | |
if node.op_type == "Unsqueeze": | |
if self.node_reference[node.output[0]] != 1 or i + 2 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node2.op_type != "BatchNormalization" or node3.op_type != "Squeeze": | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node2.input[0] = node.input[0] | |
node2.output[0] = node3.output[0] | |
reduced_node_count[0] += 2 | |
i += 2 | |
def fuse_unsqueeze_prelu(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# PReLU <= Unsqueeze - PReLU | |
if node.op_type == "Unsqueeze": | |
# check weight | |
if node.input[0] not in self.weights: | |
continue | |
B = self.weights[node.input[0]] | |
if len(B.dims) != 1: | |
continue | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
# axes = (1, 2) | |
axes = get_node_attr_ai(node, "axes") | |
if axes.size != 2 or axes[0] != 1 or axes[1] != 2: | |
continue | |
if i + 1 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type != "PRelu" or node2.input[1] != node.output[0]: | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node2.input[1] = node.input[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_normalize(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Normalize <= X - ReduceL2 - Clip - Expand - Div | |
# Normalize <= X - ReduceL2 - Clip - Shape - Expand - Div | |
if node.op_type == "ReduceL2": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
# axes = (1) | |
axes = get_node_attr_ai(node, "axes") | |
if len(axes) != 1 or axes[0] != 1 or i + 3 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
has_shape_node = node3.op_type == "Shape" | |
node_shape = NodeProto() | |
if has_shape_node: | |
if i + 4 >= self.node_count: | |
continue | |
node_shape = node3 | |
node3 = self.mutable_graph_nodes[i + 3] | |
node4 = self.mutable_graph_nodes[i + 4] | |
if ( | |
node2.op_type != "Clip" | |
or node3.op_type != "Expand" | |
or node4.op_type != "Div" | |
): | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 1 | |
or self.node_reference[node3.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
or node4.input[0] != node.input[0] | |
or node4.input[1] != node3.output[0] | |
): | |
continue | |
if has_shape_node and ( | |
node_shape.input[0] != node.input[0] | |
or node3.input[1] != node_shape.output[0] | |
): | |
continue | |
# +eps | |
if len(node2.input) == 1: | |
clip_min = get_node_attr_f(node2, "min", -FLOAT32_MAX) | |
else: | |
min_tp = self.weights[node2.input[1]] | |
clip_min = get_node_attr_from_input_f(min_tp) | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
if has_shape_node: | |
node_shape.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 2 if has_shape_node else 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if has_shape_node: | |
self.node_reference[node_shape.output[0]] -= 1 | |
self.node_reference[node3.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
if has_shape_node: | |
self.blob_names.pop(node_shape.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
node4.op_type = "Normalize" | |
node4.ClearField("input") | |
node4.input.append(node.input[0]) | |
attr_alpha = AttributeProto(name="eps", f=clip_min, type=APT.FLOAT) | |
node4.attribute.append(attr_alpha) | |
reduced_node_count[0] += 4 if has_shape_node else 3 | |
i += 4 if has_shape_node else 3 | |
def fuse_groupnorm(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# GroupNorm <= X - Reshape - InstanceNormalization - Reshape - Mul - Add | |
if node.op_type == "Reshape": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
else: | |
# Skip weight reshape | |
if node.input[1] not in self.weights: | |
continue | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
# 0, group, -1 | |
if ( | |
shape.size != 3 | |
or shape[0] != 0 | |
or shape[2] != -1 | |
or i + 4 >= self.node_count | |
): | |
continue | |
groups = shape[1] | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
node5 = self.mutable_graph_nodes[i + 4] | |
if ( | |
node2.op_type != "InstanceNormalization" | |
or node3.op_type != "Reshape" | |
or node4.op_type != "Mul" | |
or node5.op_type != "Add" | |
): | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 1 | |
or self.node_reference[node3.output[0]] != 1 | |
or self.node_reference[node4.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
or node4.input[0] != node3.output[0] | |
or node5.input[0] != node4.output[0] | |
): | |
continue | |
# InstanceNormalization S=1 B=0 | |
S = get_node_attr_from_input_af(self.weights[node2.input[1]]) | |
B = get_node_attr_from_input_af(self.weights[node2.input[2]]) | |
if S.size != groups or B.size != groups: | |
continue | |
if np.any(S != 1) or np.any(B != 0): | |
continue | |
if len(node3.input) == 1: | |
shape2 = get_node_attr_ai(node3, "shape") | |
else: | |
# Skip weight reshape | |
if node3.input[1] not in self.weights: | |
continue | |
shape2 = get_node_attr_from_input_ai(self.weights[node3.input[1]]) | |
# 1, channels, w, h | |
if shape2.size != 4 or shape2[0] != 1: | |
continue | |
channels = shape2[1] | |
# affine | |
affine_S = get_node_attr_from_input_af(self.weights[node4.input[1]]) | |
affine_B = get_node_attr_from_input_af(self.weights[node5.input[1]]) | |
if affine_S.size != channels and affine_B.size != channels: | |
continue # only per-channel affine allowed | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
node4.op_type = "noop_reducedncnn" | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node2.input[2]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.node_reference[node3.output[0]] -= 1 | |
self.node_reference[node4.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
self.blob_names.pop(node4.output[0], None) | |
affine_scale = node4.input[1] | |
affine_bias = node5.input[1] | |
node5.op_type = "GroupNorm" | |
node5.ClearField("input") | |
node5.input.append(node.input[0]) | |
node5.input.append(affine_scale) | |
node5.input.append(affine_bias) | |
attr_groups = AttributeProto(name="groups", i=groups, type=APT.INT) | |
node5.attribute.append(attr_groups) | |
attr_channels = AttributeProto( | |
name="channels", i=channels, type=APT.INT | |
) | |
node5.attribute.append(attr_channels) | |
# +eps | |
eps = get_node_attr_f(node2, "epsilon", 0.00001) | |
attr_eps = AttributeProto(name="epsilon", f=eps, type=APT.FLOAT) | |
node5.attribute.append(attr_eps) | |
attr_affine = AttributeProto(name="affine", i=1, type=APT.INT) | |
node5.attribute.append(attr_affine) | |
reduced_node_count[0] += 4 | |
i += 4 | |
def fuse_layernorm(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# LayerNorm <= X - ReduceMean - Sub - Pow - ReduceMean - Add - Sqrt - Div | |
# LayerNorm <= X - ReduceMean - Sub - Pow - ReduceMean - Add - Sqrt - Div - Mul - Add | |
if node.op_type == "ReduceMean": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
axes = get_node_attr_ai(node, "axes") | |
# -1 | |
# -2 -1 | |
if axes.size != 1 and axes.size != 2: | |
continue | |
if (axes.size == 1 and axes[0] != -1) or ( | |
axes.size == 2 and (axes[0] != -2 or axes[1] != -1) | |
): | |
continue | |
if i + 6 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
node5 = self.mutable_graph_nodes[i + 4] | |
node6 = self.mutable_graph_nodes[i + 5] | |
node7 = self.mutable_graph_nodes[i + 6] | |
if node2.op_type != "Sub" or node3.op_type != "Pow": | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 2 | |
or self.node_reference[node3.output[0]] != 1 | |
or self.node_reference[node4.output[0]] != 1 | |
or self.node_reference[node5.output[0]] != 1 | |
or self.node_reference[node6.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node2.input[1] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
or node4.input[0] != node3.output[0] | |
or node5.input[0] != node4.output[0] | |
or node6.input[0] != node5.output[0] | |
or node7.input[0] != node2.output[0] | |
or node7.input[1] != node6.output[0] | |
): | |
continue | |
if node3.input[1] not in self.weights: | |
continue | |
pow_two = self.weights[node3.input[1]] | |
if ( | |
len(pow_two.dims) != 0 | |
or get_tensor_proto_data_size(pow_two, pow_two.data_type) != 1 | |
): | |
continue | |
constant_pow_two = get_node_attr_from_input_f(pow_two) | |
if constant_pow_two != 2: | |
continue | |
axes4 = get_node_attr_ai(node4, "axes") | |
# -1 | |
# -2 -1 | |
if axes4.size != axes.size: | |
continue | |
if (axes.size == 1 and axes[4] != -1) or ( | |
axes.size == 2 and (axes4[0] != -2 or axes4[1] != -1) | |
): | |
continue | |
if node5.input[1] not in self.weights: | |
continue | |
add_eps = self.weights[node5.input[1]] | |
if ( | |
len(add_eps.dims) != 0 | |
or get_tensor_proto_data_size(add_eps, add_eps.data_type) != 1 | |
): | |
continue | |
eps = get_node_attr_from_input_f(add_eps) | |
affine = 0 | |
while i + 8 < self.node_count: | |
node8 = self.mutable_graph_nodes[i + 7] | |
node9 = self.mutable_graph_nodes[i + 8] | |
if node8.op_type != "Mul" or node9.op_type != "Add": | |
break | |
if ( | |
self.node_reference[node7.output[0]] != 1 | |
or self.node_reference[node8.output[0]] != 1 | |
): | |
break | |
if ( | |
node8.input[0] != node7.output[0] | |
or node9.input[0] != node8.output[0] | |
): | |
break | |
# affine | |
affine_S = get_node_attr_from_input_af(self.weights[node8.input[1]]) | |
affine_B = get_node_attr_from_input_af(self.weights[node9.input[1]]) | |
if affine_S.size != affine_B.size: | |
break | |
affine = 1 | |
break | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
node4.op_type = "noop_reducedncnn" | |
node5.op_type = "noop_reducedncnn" | |
node6.op_type = "noop_reducedncnn" | |
self.node_reference[node2.input[0]] -= 1 | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node3.input[0]] -= 1 | |
self.node_reference[node3.input[1]] -= 1 | |
self.node_reference[node4.input[0]] -= 1 | |
self.node_reference[node5.input[0]] -= 1 | |
self.node_reference[node5.input[1]] -= 1 | |
self.node_reference[node6.input[0]] -= 1 | |
self.node_reference[node7.input[0]] -= 1 | |
self.node_reference[node7.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
self.blob_names.pop(node4.output[0], None) | |
self.blob_names.pop(node5.output[0], None) | |
self.blob_names.pop(node6.output[0], None) | |
attr_eps = AttributeProto(name="epsilon", f=eps, type=APT.FLOAT) | |
attr_affine = AttributeProto(name="affine", i=affine, type=APT.INT) | |
if affine == 0: | |
node7.op_type = "LayerNorm" | |
node7.ClearField("input") | |
node7.input.append(node.input[0]) | |
node7.attribute.append(attr_eps) | |
node7.attribute.append(attr_affine) | |
reduced_node_count[0] += 6 | |
i += 6 | |
else: | |
# This is probably unnecessary on their part, but I'm paranoid | |
node8 = self.mutable_graph_nodes[i + 7] | |
node9 = self.mutable_graph_nodes[i + 8] | |
node7.op_type = "noop_reducedncnn" | |
node8.op_type = "noop_reducedncnn" | |
self.node_reference[node8.input[0]] -= 1 | |
self.node_reference[node9.input[0]] -= 1 | |
self.blob_names.pop(node7.output[0], None) | |
self.blob_names.pop(node8.output[0], None) | |
affine_scale = node8.input[1] | |
affine_bias = node9.input[1] | |
node9.op_type = "LayerNorm" | |
node9.ClearField("input") | |
node9.input.append(node.input[0]) | |
node9.input.append(affine_scale) | |
node9.input.append(affine_bias) | |
node9.attribute.append(attr_eps) | |
node9.attribute.append(attr_affine) | |
reduced_node_count[0] += 8 | |
i += 8 | |
def fuse_flatten(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Flatten <= X - Shape - Gather - Constant - Unsqueeze - Unsqueeze - Concat - Reshape | |
if node.op_type == "Shape": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
if i + 6 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
node5 = self.mutable_graph_nodes[i + 4] | |
node6 = self.mutable_graph_nodes[i + 5] | |
node7 = self.mutable_graph_nodes[i + 6] | |
if ( | |
node2.op_type != "Gather" | |
or node3.op_type != "Constant" | |
or node4.op_type != "Unsqueeze" | |
or node5.op_type != "Unsqueeze" | |
or node6.op_type != "Concat" | |
or node7.op_type != "Reshape" | |
): | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 1 | |
or self.node_reference[node4.output[0]] != 1 | |
or self.node_reference[node5.output[0]] != 1 | |
or self.node_reference[node6.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node4.input[0] != node2.output[0] | |
or node5.input[0] != node3.output[0] | |
or node6.input[0] != node4.output[0] | |
or node6.input[1] != node5.output[0] | |
or node7.input[0] != node.input[0] | |
or node7.input[1] != node6.output[0] | |
): | |
continue | |
# axis = 0 | |
gather_axis = get_node_attr_i(node2, "axis") | |
if gather_axis != 0: | |
continue | |
# indices = 0 | |
if node2.input[1] not in self.weights: | |
continue | |
gather_indices = get_node_attr_from_input_ai( | |
self.weights[node2.input[1]] | |
) | |
if gather_indices.size != 1 or gather_indices[0] != 0: | |
continue | |
# axes = (0) | |
unsqueeze_axes = get_node_attr_ai(node4, "axes") | |
if unsqueeze_axes.size != 1 or unsqueeze_axes[0] != 0: | |
continue | |
unsqueeze_axes2 = get_node_attr_ai(node5, "axes") | |
if unsqueeze_axes2.size != 1 or unsqueeze_axes2[0] != 0: | |
continue | |
# data = -1 | |
if node5.input[0] not in self.weights: | |
continue | |
unsqueeze2_data = get_node_attr_from_input_ai( | |
self.weights[node5.input[0]] | |
) | |
if unsqueeze2_data.size != 1 or unsqueeze2_data[0] != -1: | |
continue | |
# axis = 0 | |
concat_axis = get_node_attr_i(node6, "axis") | |
if concat_axis != 0: | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node4.op_type = "noop_reducedncnn" | |
node5.op_type = "noop_reducedncnn" | |
node6.op_type = "noop_reducedncnn" | |
self.node_reference[node.input[0]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.input[1]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
self.node_reference[node4.output[0]] -= 1 | |
self.node_reference[node5.input[0]] -= 1 | |
self.node_reference[node5.output[0]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node4.output[0], None) | |
self.blob_names.pop(node5.output[0], None) | |
self.blob_names.pop(node6.output[0], None) | |
node7.op_type = "Flatten" | |
node7.ClearField("input") | |
node7.input.append(node.input[0]) | |
reduced_node_count[0] += 5 | |
i += 5 | |
def fuse_pixelshuffle(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# PixelShuffle <= Reshape - Transpose - Reshape | |
# PixelShuffle <= Reshape - Transpose - Constant - Reshape | |
if node.op_type == "Reshape": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
else: | |
# skip weight reshape | |
if node.input[1] not in self.weights: | |
continue | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
# -1, 3, upscale_factor, upscale_factor, height, width | |
if ( | |
shape.size != 6 | |
or (shape[0] != 1 and shape[0] != -1) | |
or shape[2] != shape[3] | |
or i + 2 >= self.node_count | |
): | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node3.op_type == "Constant": | |
if i + 3 >= self.node_count: | |
continue | |
node3 = self.mutable_graph_nodes[i + 3] | |
if node2.op_type != "Transpose" or node3.op_type != "Reshape": | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
# 0 1 4 2 5 3 | |
perm = get_node_attr_ai(node2, "perm") | |
if ( | |
perm.size != 6 | |
or perm[0] != 0 | |
or perm[1] != 1 | |
or perm[2] != 4 | |
or perm[3] != 2 | |
or perm[4] != 5 | |
or perm[5] != 3 | |
): | |
continue | |
if len(node3.input) == 1: | |
shape3 = get_node_attr_ai(node3, "shape") | |
else: | |
if node3.input[1] not in self.weights: | |
continue | |
shape3 = get_node_attr_from_input_ai(self.weights[node3.input[1]]) | |
# -1, 3, height, width | |
if ( | |
shape3.size != 4 | |
or (shape3[0] != 1 and shape3[0] != -1) | |
or shape3[1] != shape[1] | |
or shape3[2] != shape[2] * shape[4] | |
or shape3[3] != shape[3] * shape[5] | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node3.op_type = "PixelShuffle" | |
node3.input[0] = node.input[0] | |
attr_group = AttributeProto( | |
name="scale_factor", i=shape[2], type=APT.INT | |
) | |
node3.attribute.append(attr_group) | |
reduced_node_count[0] += 2 | |
i += 2 | |
def fuse_reorg(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# PixelShuffle <= Reshape - Transpose - Reshape | |
# PixelShuffle <= Reshape - Transpose - Constant - Reshape | |
if node.op_type == "Reshape": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
else: | |
if node.input[1] not in self.weights: | |
continue | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
# -1, 3, out_height, block_size, out_width, block_size | |
if ( | |
shape.size != 6 | |
or (shape[0] != 1 and shape[0] != -1) | |
or shape[3] != shape[5] | |
or i + 2 >= self.node_count | |
): | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node3.op_type == "Constant": | |
if i + 3 >= self.node_count: | |
continue | |
node3 = self.mutable_graph_nodes[i + 3] | |
if node2.op_type != "Transpose" or node3.op_type != "Reshape": | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
# 0 1 3 5 2 4 | |
perm = get_node_attr_ai(node2, "perm") | |
if ( | |
perm.size != 6 | |
or perm[0] != 0 | |
or perm[1] != 1 | |
or perm[2] != 3 | |
or perm[3] != 5 | |
or perm[4] != 2 | |
or perm[5] != 4 | |
): | |
continue | |
if len(node3.input) == 1: | |
shape3 = get_node_attr_ai(node3, "shape") | |
else: | |
if node3.input[1] not in self.weights: | |
continue | |
shape3 = get_node_attr_from_input_ai(self.weights[node3.input[1]]) | |
# -1, out_channels, out_height, out_width | |
if ( | |
shape3.size != 4 | |
or (shape3[0] != 1 and shape3[0] != -1) | |
or shape3[1] != shape[1] * shape[3] * shape[5] | |
or shape3[2] != shape[2] | |
or shape3[3] != shape[4] | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node3.op_type = "Reorg" | |
node3.input[0] = node.input[0] | |
attr_group = AttributeProto(name="stride", i=shape[3], type=APT.INT) | |
node3.attribute.append(attr_group) | |
reduced_node_count[0] += 2 | |
i += 2 | |
def fuse_expand_broadcast(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Add/Sub/Mul/Div/Min/Max <= Expand - Add/Sub/Mul/Div/Min/Max | |
if node.op_type == "Expand": | |
if self.node_reference[node.output[0]] != 1 or i + 1 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type not in ["Add", "Sub", "Mul", "Div", "Min", "Max"]: | |
continue | |
if ( | |
node2.input[1] != node.output[0] | |
and node2.input[0] != node.output[0] | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
if len(node.input) == 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
if node2.input[0] == node.output[0]: | |
node2.input[0] = node.input[0] | |
else: | |
node2.input[1] = node.input[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_lstm_gru_rnn(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# LSTM(bi) <= LSTM(bi) - Transpose - Reshape - Transpose | |
if node.op_type in ["LSTM", "GRU", "RNN"]: | |
if self.node_reference[node.output[0]] != 1 or i + 2 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
if node2.op_type != "Transpose" or node3.op_type != "Reshape": | |
continue | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
): | |
continue | |
direction = get_node_attr_s(node, "direction") | |
if direction != "bidirectional": | |
continue | |
# 0 2 1 3 | |
perm = get_node_attr_ai(node2, "perm") | |
if ( | |
perm.size != 4 | |
or perm[0] != 0 | |
or perm[1] != 2 | |
or perm[2] != 1 | |
or perm[3] != 3 | |
): | |
continue | |
if len(node3.input) == 1: | |
shape = get_node_attr_ai(node3, "shape") | |
else: | |
if node3.input[1] not in self.weights: | |
continue | |
shape = get_node_attr_from_input_ai(self.weights[node3.input[1]]) | |
# 0 0 -1 | |
if shape.size != 3 or shape[0] != 0 or shape[1] != 0 or shape[2] != -1: | |
continue | |
# reduce | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.node_reference[node2.output[0]] -= 1 | |
if len(node3.input) == 2: | |
self.node_reference[node3.input[1]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
node.output[0] = node3.output[0] | |
reduced_node_count[0] += 2 | |
i += 2 | |
if i + 1 < self.node_count: | |
if self.node_reference[node3.output[0]] != 1: | |
continue | |
node4 = self.mutable_graph_nodes[i + 1] | |
if node4.op_type != "Transpose": | |
continue | |
if node4.input[0] != node.output[0]: | |
continue | |
# 1 0 2 | |
perm4 = get_node_attr_ai(node4, "perm") | |
if ( | |
perm4.size != 3 | |
or perm4[0] != 1 | |
or perm4[1] != 0 | |
or perm4[2] != 2 | |
): | |
continue | |
# reduce | |
node4.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node.output[0] = node4.output[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# LSTM(uni) <= LSTM(uni) - Squeeze - Transpose | |
if node.op_type in ["LSTM", "GRU", "RNN"]: | |
if self.node_reference[node.output[0]] != 1 or i + 1 >= self.node_count: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type != "Squeeze": | |
continue | |
if node2.input[0] != node.output[0]: | |
continue | |
direction = get_node_attr_s(node, "direction") | |
if direction == "bidirectional": | |
continue | |
axes = get_node_attr_ai(node2, "axes") | |
if axes.size != 1 or axes[0] != 1: | |
continue | |
# reduce | |
node2.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node.output[0] = node2.output[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
if i + 1 < self.node_count: | |
if self.node_reference[node2.output[0]] != 1: | |
continue | |
node3 = self.mutable_graph_nodes[i + 1] | |
if node3.op_type != "Transpose": | |
continue | |
if node3.input[0] != node.output[0]: | |
continue | |
# 1 0 2 | |
perm4 = get_node_attr_ai(node3, "perm") | |
if ( | |
perm4.size != 3 | |
or perm4[0] != 1 | |
or perm4[1] != 0 | |
or perm4[2] != 2 | |
): | |
continue | |
# reduce | |
node3.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node.output[0] = node3.output[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# LSTM <= Transpose - LSTM | |
if node.op_type == "Transpose": | |
if self.node_reference[node.output[0]] != 1: | |
continue | |
# 1 0 2 | |
perm = get_node_attr_ai(node, "perm") | |
if perm.size != 3 or perm[0] != 1 or perm[1] != 0 or perm[2] != 2: | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
if node2.op_type not in ["LSTM", "GRU", "RNN"]: | |
continue | |
if node2.input[0] != node.output[0]: | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
self.node_reference[node.output[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
node2.input[0] = node.input[0] | |
reduced_node_count[0] += 1 | |
i += 1 | |
def fuse_multiheadattention(self, reduced_node_count: List[int]) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# MultiHeadAttention <= MatMul(q) - Add | |
# - MatMul(k) - Add | |
# - MatMul(v) - Add | |
# - Mul | |
# - Reshape - Transpose | |
# - Reshape - Reshape - Transpose - Transpose | |
# - Gemm - Softmax - Gemm - Transpose - Reshape - MatMul - Add | |
if node.op_type == "MatMul": | |
if ( | |
self.node_reference[node.output[0]] != 1 | |
or i + 19 >= self.node_count | |
): | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
node5 = self.mutable_graph_nodes[i + 4] | |
node6 = self.mutable_graph_nodes[i + 5] | |
node7 = self.mutable_graph_nodes[i + 6] | |
node8 = self.mutable_graph_nodes[i + 7] | |
node9 = self.mutable_graph_nodes[i + 8] | |
node10 = self.mutable_graph_nodes[i + 9] | |
node11 = self.mutable_graph_nodes[i + 10] | |
node12 = self.mutable_graph_nodes[i + 11] | |
node13 = self.mutable_graph_nodes[i + 12] | |
node14 = self.mutable_graph_nodes[i + 13] | |
node15 = self.mutable_graph_nodes[i + 14] | |
node16 = self.mutable_graph_nodes[i + 15] | |
node17 = self.mutable_graph_nodes[i + 16] | |
node18 = self.mutable_graph_nodes[i + 17] | |
node19 = self.mutable_graph_nodes[i + 18] | |
node20 = self.mutable_graph_nodes[i + 19] | |
if ( | |
node2.op_type != "Add" | |
or node3.op_type != "MatMul" | |
or node4.op_type != "Add" | |
or node5.op_type != "MatMul" | |
or node6.op_type != "Add" | |
or node7.op_type != "Mul" | |
or node8.op_type != "Reshape" | |
or node9.op_type != "Transpose" | |
or node10.op_type != "Reshape" | |
or node11.op_type != "Reshape" | |
or node12.op_type != "Transpose" | |
or node13.op_type != "Transpose" | |
or node14.op_type != "MatMul" | |
or node15.op_type != "Softmax" | |
or node16.op_type != "MatMul" | |
or node17.op_type != "Transpose" | |
or node18.op_type != "Reshape" | |
or node19.op_type != "MatMul" | |
or node20.op_type != "Add" | |
): | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 1 | |
or self.node_reference[node3.output[0]] != 1 | |
or self.node_reference[node4.output[0]] != 1 | |
or self.node_reference[node5.output[0]] != 1 | |
or self.node_reference[node6.output[0]] != 1 | |
or self.node_reference[node7.output[0]] != 1 | |
or self.node_reference[node8.output[0]] != 1 | |
or self.node_reference[node9.output[0]] != 1 | |
or self.node_reference[node10.output[0]] != 1 | |
or self.node_reference[node11.output[0]] != 1 | |
or self.node_reference[node12.output[0]] != 1 | |
or self.node_reference[node13.output[0]] != 1 | |
or self.node_reference[node14.output[0]] != 1 | |
or self.node_reference[node15.output[0]] != 1 | |
or self.node_reference[node16.output[0]] != 1 | |
or self.node_reference[node17.output[0]] != 1 | |
or self.node_reference[node18.output[0]] != 1 | |
or self.node_reference[node19.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node4.input[0] != node3.output[0] | |
or node6.input[0] != node5.output[0] | |
or node7.input[0] != node2.output[0] | |
or node8.input[0] != node7.output[0] | |
or node9.input[0] != node8.output[0] | |
or node10.input[0] != node4.output[0] | |
or node11.input[0] != node6.output[0] | |
or node12.input[0] != node11.output[0] | |
or node13.input[0] != node10.output[0] | |
or node14.input[0] != node9.output[0] | |
or node14.input[1] != node13.output[0] | |
or node15.input[0] != node14.output[0] | |
or node16.input[0] != node15.output[0] | |
or node16.input[1] != node12.output[0] | |
or node17.input[0] != node16.output[0] | |
or node18.input[0] != node17.output[0] | |
or node19.input[0] != node18.output[0] | |
or node20.input[0] != node19.output[0] | |
): | |
continue | |
q_B = get_node_attr_from_input_af(self.weights[node2.input[1]]) | |
k_B = get_node_attr_from_input_af(self.weights[node4.input[1]]) | |
v_B = get_node_attr_from_input_af(self.weights[node6.input[1]]) | |
o_B = get_node_attr_from_input_af(self.weights[node20.input[1]]) | |
if q_B.size != k_B.size or q_B.size != v_B.size or q_B.size != o_B.size: | |
continue | |
embed_dim = q_B.size | |
# 1 0 2 | |
perm9 = get_node_attr_ai(node9, "perm") | |
perm12 = get_node_attr_ai(node12, "perm") | |
if perm9.size != 3 or perm9[0] != 1 or perm9[1] != 0 or perm9[2] != 2: | |
continue | |
if ( | |
perm12.size != 3 | |
or perm12[0] != 1 | |
or perm12[1] != 0 | |
or perm12[2] != 2 | |
): | |
continue | |
# 1 2 0 | |
perm13 = get_node_attr_ai(node13, "perm") | |
if ( | |
perm13.size != 3 | |
or perm13[0] != 1 | |
or perm13[1] != 2 | |
or perm13[2] != 0 | |
): | |
continue | |
# 1 0 2 | |
perm17 = get_node_attr_ai(node17, "perm") | |
if ( | |
perm17.size != 3 | |
or perm17[0] != 1 | |
or perm17[1] != 0 | |
or perm17[2] != 2 | |
): | |
continue | |
softmax_axis = get_node_attr_i(node15, "axis") | |
if softmax_axis != 2: | |
continue | |
# 1/-1 seqlen * num_heads, embed_dim / num_heads | |
if len(node8.input) == 1: | |
shape8 = get_node_attr_ai(node8, "shape") | |
else: | |
if node8.input[1] not in self.weights: | |
continue | |
shape8 = get_node_attr_from_input_ai(self.weights[node8.input[1]]) | |
if len(node10.input) == 1: | |
shape10 = get_node_attr_ai(node10, "shape") | |
else: | |
if node10.input[1] not in self.weights: | |
continue | |
shape10 = get_node_attr_from_input_ai(self.weights[node10.input[1]]) | |
if len(node11.input) == 1: | |
shape11 = get_node_attr_ai(node11, "shape") | |
else: | |
if node11.input[1] not in self.weights: | |
continue | |
shape11 = get_node_attr_from_input_ai(self.weights[node11.input[1]]) | |
if shape8.size != 3 or shape10.size != 3 or shape11.size != 3: | |
continue | |
if ( | |
shape8[1] != shape10[1] | |
or shape8[1] != shape11[1] | |
or shape8[2] != shape10[2] | |
or shape8[2] != shape11[2] | |
): | |
continue | |
num_heads = embed_dim / shape8[2] | |
if len(node18.input) == 1: | |
shape18 = get_node_attr_ai(node18, "shape") | |
else: | |
if node18.input[1] not in self.weights: | |
continue | |
shape18 = get_node_attr_from_input_ai(self.weights[node18.input[1]]) | |
if ( | |
shape18.size != 3 | |
or shape18[2] != embed_dim | |
or shape18[1] * num_heads != shape8[1] | |
): | |
continue | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
node4.op_type = "noop_reducedncnn" | |
node5.op_type = "noop_reducedncnn" | |
node6.op_type = "noop_reducedncnn" | |
node7.op_type = "noop_reducedncnn" | |
node8.op_type = "noop_reducedncnn" | |
node9.op_type = "noop_reducedncnn" | |
node10.op_type = "noop_reducedncnn" | |
node11.op_type = "noop_reducedncnn" | |
node12.op_type = "noop_reducedncnn" | |
node13.op_type = "noop_reducedncnn" | |
node14.op_type = "noop_reducedncnn" | |
node15.op_type = "noop_reducedncnn" | |
node16.op_type = "noop_reducedncnn" | |
node17.op_type = "noop_reducedncnn" | |
node18.op_type = "noop_reducedncnn" | |
node19.op_type = "noop_reducedncnn" | |
self.node_reference[node2.input[0]] -= 1 | |
self.node_reference[node4.input[0]] -= 1 | |
self.node_reference[node6.input[0]] -= 1 | |
self.node_reference[node7.input[0]] -= 1 | |
self.node_reference[node7.input[1]] -= 1 | |
self.node_reference[node8.input[0]] -= 1 | |
if len(node8.input) == 2: | |
self.node_reference[node8.input[1]] -= 1 | |
self.node_reference[node9.input[0]] -= 1 | |
self.node_reference[node10.input[0]] -= 1 | |
if len(node10.input) == 2: | |
self.node_reference[node10.input[1]] -= 1 | |
self.node_reference[node11.input[0]] -= 1 | |
if len(node11.input) == 2: | |
self.node_reference[node11.input[1]] -= 1 | |
self.node_reference[node12.input[0]] -= 1 | |
self.node_reference[node13.input[0]] -= 1 | |
self.node_reference[node14.input[0]] -= 1 | |
self.node_reference[node14.input[1]] -= 1 | |
self.node_reference[node15.input[0]] -= 1 | |
self.node_reference[node16.input[0]] -= 1 | |
self.node_reference[node16.input[1]] -= 1 | |
self.node_reference[node17.input[0]] -= 1 | |
self.node_reference[node18.input[0]] -= 1 | |
if len(node18.input) == 2: | |
self.node_reference[node18.input[1]] -= 1 | |
self.node_reference[node19.input[0]] -= 1 | |
self.node_reference[node20.input[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
self.blob_names.pop(node4.output[0], None) | |
self.blob_names.pop(node5.output[0], None) | |
self.blob_names.pop(node6.output[0], None) | |
self.blob_names.pop(node7.output[0], None) | |
self.blob_names.pop(node8.output[0], None) | |
self.blob_names.pop(node9.output[0], None) | |
self.blob_names.pop(node10.output[0], None) | |
self.blob_names.pop(node11.output[0], None) | |
self.blob_names.pop(node12.output[0], None) | |
self.blob_names.pop(node13.output[0], None) | |
self.blob_names.pop(node14.output[0], None) | |
self.blob_names.pop(node15.output[0], None) | |
self.blob_names.pop(node16.output[0], None) | |
self.blob_names.pop(node17.output[0], None) | |
self.blob_names.pop(node18.output[0], None) | |
self.blob_names.pop(node19.output[0], None) | |
qw = node.input[1] | |
qb = node2.input[1] | |
kw = node3.input[1] | |
kb = node4.input[1] | |
vw = node5.input[1] | |
vb = node6.input[1] | |
ow = node19.input[1] | |
ob = node20.input[1] | |
node20.op_type = "MultiHeadAttention" | |
node20.ClearField("input") | |
node20.input.append(node.input[0]) | |
node20.input.append(node3.input[0]) | |
node20.input.append(node5.input[0]) | |
node20.input.append(qw) | |
node20.input.append(qb) | |
node20.input.append(kw) | |
node20.input.append(kb) | |
node20.input.append(vw) | |
node20.input.append(vb) | |
node20.input.append(ow) | |
node20.input.append(ob) | |
attr_embed_dim = AttributeProto( | |
name="embed_dim", i=embed_dim, type=APT.INT | |
) | |
node20.attribute.append(attr_embed_dim) | |
attr_num_heads = AttributeProto( | |
name="num_heads", i=num_heads, type=APT.INT | |
) | |
node20.attribute.append(attr_num_heads) | |
reduced_node_count[0] += 19 | |
i += 19 | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# MultiHeadAttention <= MatMul(qkv) - Add - Split | |
# - Mul | |
# - Reshape - Transpose | |
# - Reshape - Reshape - Transpose - Transpose | |
# - Gemm - Softmax - Gemm - Transpose - Reshape - MatMul - Add | |
if node.op_type == "MatMul": | |
if ( | |
self.node_reference[node.output[0]] != 1 | |
or i + 16 >= self.node_count | |
): | |
continue | |
node2 = self.mutable_graph_nodes[i + 1] | |
node3 = self.mutable_graph_nodes[i + 2] | |
node4 = self.mutable_graph_nodes[i + 3] | |
node5 = self.mutable_graph_nodes[i + 4] | |
node6 = self.mutable_graph_nodes[i + 5] | |
node7 = self.mutable_graph_nodes[i + 6] | |
node8 = self.mutable_graph_nodes[i + 7] | |
node9 = self.mutable_graph_nodes[i + 8] | |
node10 = self.mutable_graph_nodes[i + 9] | |
node11 = self.mutable_graph_nodes[i + 10] | |
node12 = self.mutable_graph_nodes[i + 11] | |
node13 = self.mutable_graph_nodes[i + 12] | |
node14 = self.mutable_graph_nodes[i + 13] | |
node15 = self.mutable_graph_nodes[i + 14] | |
node16 = self.mutable_graph_nodes[i + 15] | |
node17 = self.mutable_graph_nodes[i + 16] | |
if ( | |
node2.op_type != "Add" | |
or node3.op_type != "Split" | |
or node4.op_type != "Mul" | |
or node5.op_type != "Reshape" | |
or node6.op_type != "Transpose" | |
or node7.op_type != "Reshape" | |
or node8.op_type != "Reshape" | |
or node9.op_type != "Transpose" | |
or node10.op_type != "Transpose" | |
or node11.op_type != "MatMul" | |
or node12.op_type != "Softmax" | |
or node13.op_type != "MatMul" | |
or node14.op_type != "Transpose" | |
or node15.op_type != "Reshape" | |
or node16.op_type != "MatMul" | |
or node17.op_type != "Add" | |
): | |
continue | |
if ( | |
self.node_reference[node2.output[0]] != 1 | |
or self.node_reference[node3.output[0]] != 1 | |
or self.node_reference[node3.output[1]] != 1 | |
or self.node_reference[node3.output[2]] != 1 | |
or self.node_reference[node4.output[0]] != 1 | |
or self.node_reference[node5.output[0]] != 1 | |
or self.node_reference[node6.output[0]] != 1 | |
or self.node_reference[node7.output[0]] != 1 | |
or self.node_reference[node8.output[0]] != 1 | |
or self.node_reference[node9.output[0]] != 1 | |
or self.node_reference[node10.output[0]] != 1 | |
or self.node_reference[node11.output[0]] != 1 | |
or self.node_reference[node12.output[0]] != 1 | |
or self.node_reference[node13.output[0]] != 1 | |
or self.node_reference[node14.output[0]] != 1 | |
or self.node_reference[node15.output[0]] != 1 | |
or self.node_reference[node16.output[0]] != 1 | |
): | |
continue | |
if ( | |
node2.input[0] != node.output[0] | |
or node3.input[0] != node2.output[0] | |
or node4.input[0] != node3.output[0] | |
or node5.input[0] != node4.output[0] | |
or node6.input[0] != node5.output[0] | |
or node7.input[0] != node3.output[1] | |
or node8.input[0] != node3.output[2] | |
or node9.input[0] != node8.output[0] | |
or node10.input[0] != node7.output[0] | |
or node11.input[0] != node6.output[0] | |
or node11.input[1] != node10.output[0] | |
or node12.input[0] != node11.output[0] | |
or node13.input[0] != node12.output[0] | |
or node13.input[1] != node9.output[0] | |
or node14.input[0] != node13.output[0] | |
or node15.input[0] != node14.output[0] | |
or node16.input[0] != node15.output[0] | |
or node17.input[0] != node16.output[0] | |
): | |
continue | |
qkv_B = get_node_attr_from_input_af(self.weights[node2.input[1]]) | |
o_B = get_node_attr_from_input_af(self.weights[node17.input[1]]) | |
if qkv_B.size != o_B.size * 3: | |
continue | |
embed_dim = o_B.size | |
# 1 0 2 | |
perm6 = get_node_attr_ai(node6, "perm") | |
perm9 = get_node_attr_ai(node9, "perm") | |
if perm6.size != 3 or perm6[0] != 1 or perm6[1] != 0 or perm6[2] != 2: | |
continue | |
if perm9.size != 3 or perm9[0] != 1 or perm9[1] != 0 or perm9[2] != 2: | |
continue | |
# 1 2 0 | |
perm10 = get_node_attr_ai(node10, "perm") | |
if ( | |
perm10.size != 3 | |
or perm10[0] != 1 | |
or perm10[1] != 2 | |
or perm10[2] != 0 | |
): | |
continue | |
# 1 0 2 | |
perm14 = get_node_attr_ai(node14, "perm") | |
if ( | |
perm14.size != 3 | |
or perm14[0] != 1 | |
or perm14[1] != 0 | |
or perm14[2] != 2 | |
): | |
continue | |
softmax_axis = get_node_attr_i(node12, "axis") | |
if softmax_axis != 2: | |
continue | |
# 1/-1, seqlen * num_heads, embed_dim / num_heads | |
if len(node5.input) == 1: | |
shape5 = get_node_attr_ai(node5, "shape") | |
else: | |
if node5.input[1] not in self.weights: | |
continue | |
shape5 = get_node_attr_from_input_ai(self.weights[node5.input[1]]) | |
if len(node7.input) == 1: | |
shape7 = get_node_attr_ai(node7, "shape") | |
else: | |
if node7.input[1] not in self.weights: | |
continue | |
shape7 = get_node_attr_from_input_ai(self.weights[node7.input[1]]) | |
if len(node8.input) == 1: | |
shape8 = get_node_attr_ai(node8, "shape") | |
else: | |
if node8.input[1] not in self.weights: | |
continue | |
shape8 = get_node_attr_from_input_ai(self.weights[node8.input[1]]) | |
if ( | |
shape5[1] != shape7[1] | |
or shape5[1] != shape8[1] | |
or shape5[2] != shape7[2] | |
or shape5[2] != shape8[2] | |
): | |
continue | |
num_heads = embed_dim / shape5[2] | |
# 1, seqlen, embed_dim | |
if len(node15.input) == 1: | |
shape15 = get_node_attr_ai(node15, "shape") | |
else: | |
if node15.input[1] not in self.weights: | |
continue | |
shape15 = get_node_attr_from_input_ai(self.weights[node15.input[1]]) | |
if ( | |
shape15.size != 3 | |
or shape15[2] != embed_dim | |
or shape15[1] * num_heads != shape8[1] | |
): | |
continue | |
# reduce | |
node.op_type = "noop_reducedncnn" | |
node2.op_type = "noop_reducedncnn" | |
node3.op_type = "noop_reducedncnn" | |
node4.op_type = "noop_reducedncnn" | |
node5.op_type = "noop_reducedncnn" | |
node6.op_type = "noop_reducedncnn" | |
node7.op_type = "noop_reducedncnn" | |
node8.op_type = "noop_reducedncnn" | |
node9.op_type = "noop_reducedncnn" | |
node10.op_type = "noop_reducedncnn" | |
node11.op_type = "noop_reducedncnn" | |
node12.op_type = "noop_reducedncnn" | |
node13.op_type = "noop_reducedncnn" | |
node14.op_type = "noop_reducedncnn" | |
node15.op_type = "noop_reducedncnn" | |
node16.op_type = "noop_reducedncnn" | |
self.node_reference[node2.input[0]] -= 1 | |
self.node_reference[node3.input[0]] -= 1 | |
self.node_reference[node4.input[0]] -= 1 | |
self.node_reference[node4.input[1]] -= 1 | |
self.node_reference[node5.input[0]] -= 1 | |
if len(node5.input) == 2: | |
self.node_reference[node5.input[1]] -= 1 | |
self.node_reference[node6.input[0]] -= 1 | |
self.node_reference[node7.input[0]] -= 1 | |
if len(node7.input) == 2: | |
self.node_reference[node7.input[1]] -= 1 | |
self.node_reference[node8.input[0]] -= 1 | |
if len(node8.input) == 2: | |
self.node_reference[node8.input[1]] -= 1 | |
self.node_reference[node9.input[0]] -= 1 | |
self.node_reference[node10.input[0]] -= 1 | |
self.node_reference[node11.input[0]] -= 1 | |
self.node_reference[node11.input[1]] -= 1 | |
self.node_reference[node12.input[0]] -= 1 | |
self.node_reference[node13.input[0]] -= 1 | |
self.node_reference[node13.input[1]] -= 1 | |
self.node_reference[node14.input[0]] -= 1 | |
self.node_reference[node15.input[0]] -= 1 | |
if len(node15.input) == 2: | |
self.node_reference[node15.input[1]] -= 1 | |
self.node_reference[node16.input[0]] -= 1 | |
self.node_reference[node17.input[0]] -= 1 | |
self.blob_names.pop(node.output[0], None) | |
self.blob_names.pop(node2.output[0], None) | |
self.blob_names.pop(node3.output[0], None) | |
self.blob_names.pop(node3.output[1], None) | |
self.blob_names.pop(node3.output[2], None) | |
self.blob_names.pop(node4.output[0], None) | |
self.blob_names.pop(node5.output[0], None) | |
self.blob_names.pop(node6.output[0], None) | |
self.blob_names.pop(node7.output[0], None) | |
self.blob_names.pop(node8.output[0], None) | |
self.blob_names.pop(node9.output[0], None) | |
self.blob_names.pop(node10.output[0], None) | |
self.blob_names.pop(node11.output[0], None) | |
self.blob_names.pop(node12.output[0], None) | |
self.blob_names.pop(node13.output[0], None) | |
self.blob_names.pop(node14.output[0], None) | |
self.blob_names.pop(node15.output[0], None) | |
self.blob_names.pop(node16.output[0], None) | |
qkvw = node.input[1] | |
qkvb = node2.input[1] | |
ow = node16.input[1] | |
ob = node17.input[1] | |
node17.op_type = "MultiHeadAttention" | |
node17.ClearField("input") | |
node17.input.append(node.input[0]) | |
node17.input.append(qkvw) | |
node17.input.append(qkvb) | |
node17.input.append(ow) | |
node17.input.append(ob) | |
attr_embed_dim = AttributeProto( | |
name="embed_dim", i=embed_dim, type=APT.INT | |
) | |
node17.attribute.append(attr_embed_dim) | |
attr_num_heads = AttributeProto( | |
name="num_heads", i=num_heads, type=APT.INT | |
) | |
node17.attribute.append(attr_num_heads) | |
reduced_node_count[0] += 16 | |
i += 16 | |
def fuse_binaryop_with_scalar(self) -> None: | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Add/Sub/Mul/Div/Min/Max/Pow(a, x) | |
if node.op_type in ["Add", "Sub", "Mul", "Div", "Min", "Max", "Pow"]: | |
if node.input[0] not in self.weights: | |
continue | |
scalar_b = self.weights[node.input[0]] | |
if ( | |
len(scalar_b.dims) != 0 | |
or get_tensor_proto_data_size(scalar_b, scalar_b.data_type) != 1 | |
): | |
continue | |
if node.op_type == "Sub": | |
node.op_type = "RSub" | |
elif node.op_type == "Div": | |
node.op_type = "RDiv" | |
b = get_node_attr_from_input_f(scalar_b) | |
self.node_reference[node.input[0]] -= 1 | |
node_input = node.input[1] | |
node.ClearField("input") | |
node.input.append(node_input) | |
attr_with_scalar = AttributeProto(name="with_scalar", i=1, type=APT.INT) | |
node.attribute.append(attr_with_scalar) | |
attr_b = AttributeProto(name="b", f=b, type=APT.FLOAT) | |
node.attribute.append(attr_b) | |
for i in range(self.node_count): | |
node = self.mutable_graph_nodes[i] | |
# Add/Sub/Mul/Div/Min/Max/Pow(x, b) | |
if node.op_type in ["Add", "Sub", "Mul", "Div", "Min", "Max", "Pow"]: | |
if node.input[1] not in self.weights: | |
continue | |
scalar_b = self.weights[node.input[1]] | |
if ( | |
len(scalar_b.dims) != 0 | |
or get_tensor_proto_data_size(scalar_b, scalar_b.data_type) != 1 | |
): | |
continue | |
b = get_node_attr_from_input_f(scalar_b) | |
self.node_reference[node.input[1]] -= 1 | |
node_input = node.input[0] | |
node.ClearField("input") | |
node.input.append(node_input) | |
attr_with_scalar = AttributeProto(name="with_scalar", i=1, type=APT.INT) | |
node.attribute.append(attr_with_scalar) | |
attr_b = AttributeProto(name="b", f=b, type=APT.FLOAT) | |
node.attribute.append(attr_b) | |
def convert(self, is_fp16: bool = False, include_mem_data: bool = True): | |
if is_fp16: | |
logger.debug("NCNN mode: fp16") | |
else: | |
logger.debug("NCNN mode: fp32") | |
# Topological sort | |
i = 0 | |
while i < self.node_count: | |
node = self.mutable_graph_nodes[i] | |
swapnode = False | |
missing_input_name = None | |
for input_name in node.input: | |
if ( | |
input_name | |
and input_name not in self.producers | |
and input_name not in self.weights | |
): | |
swapnode = True | |
missing_input_name = input_name | |
break | |
# If nothing missing, add outputs to producers and continue | |
# to next node | |
if not swapnode: | |
for output_name in node.output: | |
if output_name: | |
self.producers[output_name] = None | |
i += 1 | |
continue | |
# find node that produces missing_input_name | |
for j, nodeq in enumerate(self.mutable_graph_nodes, i + 1): | |
found = False | |
for output_name in nodeq.output: | |
if output_name == missing_input_name: | |
found = True | |
break | |
if found: | |
break | |
else: | |
raise RuntimeError( | |
f"Cannot find node that produces {missing_input_name}, " | |
f"which is required by node {i} ({node.name})." | |
) | |
self.swap_nodes(i, j) | |
# global definition line | |
# [layer count][blob count] | |
for node in self.onnx_graph.node: | |
op = node.op_type | |
if not node.name: | |
node.name = node.output[0] | |
if op == "Constant": | |
self.weights[node.output[0]] = get_node_attr_tensor(node, "value") | |
for input_name in node.input: | |
self.blob_names[input_name] = None | |
if input_name not in self.node_reference: | |
self.node_reference[input_name] = 1 | |
else: | |
self.node_reference[input_name] += 1 | |
if op == "Dropout": | |
output_name = node.output[0] | |
self.blob_names[output_name] = None | |
self.node_reference[output_name] = 0 | |
continue | |
for output_name in node.output: | |
self.blob_names[output_name] = None | |
self.node_reference[output_name] = 0 | |
# include Input node | |
input_node_count = 0 | |
for graph_input in self.onnx_graph.input: | |
input_name = graph_input.name | |
# check weight | |
if input_name not in self.weights: | |
self.blob_names[input_name] = None | |
input_node_count += 1 | |
# op chain fusion | |
reduced_node_count = [0] | |
self.fuse_weight_reshape(reduced_node_count) | |
self.fuse_weight_transpose(reduced_node_count) | |
self.fuse_shufflechannel(reduced_node_count) | |
self.fuse_shufflechannel_split(reduced_node_count) | |
self.fuse_hardsigmoid(reduced_node_count) | |
self.fuse_hardswish(reduced_node_count) | |
self.fuse_swish(reduced_node_count) | |
self.fuse_batchnorm1d_squeeze_unsqueeze(reduced_node_count) | |
self.fuse_unsqueeze_prelu(reduced_node_count) | |
self.fuse_normalize(reduced_node_count) | |
self.fuse_groupnorm(reduced_node_count) | |
self.fuse_layernorm(reduced_node_count) | |
self.fuse_flatten(reduced_node_count) | |
self.fuse_pixelshuffle(reduced_node_count) | |
self.fuse_reorg(reduced_node_count) | |
self.fuse_expand_broadcast(reduced_node_count) | |
self.fuse_lstm_gru_rnn(reduced_node_count) | |
self.fuse_multiheadattention(reduced_node_count) | |
self.fuse_binaryop_with_scalar() | |
self.fuse_rewrite_gather() | |
# reduce common const weight node_reference | |
for node in self.onnx_graph.node: | |
op = node.op_type | |
if op == "BatchNormalization": | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
self.node_reference[node.input[3]] -= 1 | |
self.node_reference[node.input[4]] -= 1 | |
elif op == "BiasGelu": | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "Clip": | |
if len(node.input) == 3: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "Conv": | |
self.node_reference[node.input[1]] -= 1 | |
if len(node.input) == 3: | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "ConvTranspose": | |
self.node_reference[node.input[1]] -= 1 | |
if len(node.input) == 3: | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "EmbedLayerNormalization": | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
self.node_reference[node.input[3]] -= 1 | |
self.node_reference[node.input[4]] -= 1 | |
self.node_reference[node.input[5]] -= 1 | |
self.node_reference[node.input[6]] -= 1 | |
elif op == "Gemm": | |
alpha = get_node_attr_f(node, "alpha", 1) | |
beta = get_node_attr_f(node, "beta", 1) | |
transA = get_node_attr_i(node, "transA", 0) | |
transB = get_node_attr_i(node, "transB", 0) | |
if alpha == 1 and beta == 1 and transA == 0 and transB == 1: | |
# InnerProduct-like A * B + C | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "GroupNorm": | |
affine = get_node_attr_i(node, "affine", 1) | |
if affine: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "GRU": | |
for gru_input in node.input: | |
self.node_reference[gru_input] -= 1 | |
elif op == "InstanceNormalization": | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "LayerNorm": | |
affine = get_node_attr_i(node, "affine", 1) | |
if affine: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
elif op == "LSTM": | |
for lstm_input in node.input: | |
self.node_reference[lstm_input] -= 1 | |
elif op == "MatMul": | |
if ( | |
node.input[1] in self.weights | |
and len(self.weights[node.input[1]].dims) == 2 | |
): | |
# InnerProduct | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "MultiHeadAttention": | |
if len(node.input) == 5: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
self.node_reference[node.input[3]] -= 1 | |
self.node_reference[node.input[4]] -= 1 | |
else: | |
self.node_reference[node.input[3]] -= 1 | |
self.node_reference[node.input[4]] -= 1 | |
self.node_reference[node.input[5]] -= 1 | |
self.node_reference[node.input[6]] -= 1 | |
self.node_reference[node.input[7]] -= 1 | |
self.node_reference[node.input[8]] -= 1 | |
self.node_reference[node.input[9]] -= 1 | |
self.node_reference[node.input[10]] -= 1 | |
elif op == "Pad": | |
if len(node.input) >= 2: | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "PRelu": | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "Reshape": | |
if len(node.input) >= 2: | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "Resize": | |
if len(node.input) == 2: | |
# opset 10 | |
self.node_reference[node.input[1]] -= 1 | |
else: | |
# opset 11+ | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
if len(node.input) >= 4: | |
self.node_reference[node.input[3]] -= 1 | |
elif op == "RNN": | |
for rnn_input in node.input: | |
self.node_reference[rnn_input] -= 1 | |
elif op == "SkipLayerNormalization": | |
self.node_reference[node.input[2]] -= 1 | |
self.node_reference[node.input[3]] -= 1 | |
self.node_reference[node.input[4]] -= 1 | |
elif op == "Slice": | |
if len(node.input) >= 2: | |
self.node_reference[node.input[1]] -= 1 | |
self.node_reference[node.input[2]] -= 1 | |
if len(node.input) >= 4: | |
self.node_reference[node.input[3]] -= 1 | |
if len(node.input) >= 5: | |
self.node_reference[node.input[4]] -= 1 | |
elif op == "Upsample": | |
if len(node.input) >= 2: | |
self.node_reference[node.input[1]] -= 1 | |
elif op == "adaptive_avg_pool2d" or op == "adaptive_max_pool2d": | |
if len(node.input) >= 2: | |
self.node_reference[node.input[1]] -= 1 | |
# count all weight node with zero reference | |
zero_reference_weight_node_count = 0 | |
for input_name in self.weights.keys(): | |
# there may be some weight nodes in initializer but none of the graph nodes use them | |
# add them to blob_names so we could get proper blob count later | |
self.blob_names[input_name] = None | |
refcount = self.node_reference[input_name] | |
if refcount == 0: | |
zero_reference_weight_node_count += 1 | |
# we always treat constant nodes as weights or binaryop_weights | |
# do not count it twice for layer_count | |
constant_node_count_moved_to_weight = 0 | |
for node in self.onnx_graph.node: | |
if node.op_type == "Constant": | |
constant_node_count_moved_to_weight += 1 | |
# some ops may have anonymous input | |
# LSTM sequence_lens | |
self.blob_names.pop("", None) | |
self.node_reference.pop("", None) | |
# remove node_reference entries with references equal to one | |
split_layer_count = 0 | |
splitncnn_blob_count = 0 | |
# split node reference | |
split_node_reference = {} | |
for ref, count in self.node_reference.items(): | |
if count > 1: | |
split_layer_count += 1 | |
splitncnn_blob_count += count | |
split_node_reference[ref] = count | |
ncnn_node_count = ( | |
self.node_count | |
- constant_node_count_moved_to_weight | |
+ len(self.weights) | |
- zero_reference_weight_node_count | |
- reduced_node_count[0] | |
+ input_node_count | |
+ split_layer_count | |
) | |
ncnn_blob_count = ( | |
len(self.blob_names) | |
- zero_reference_weight_node_count | |
+ splitncnn_blob_count | |
) | |
ncnn_model = NcnnModel(ncnn_node_count, ncnn_blob_count) | |
logger.debug( | |
f"Node count: {ncnn_model.node_count}, Blob count: {ncnn_model.blob_count}" | |
) | |
bin_length = 0 | |
for i, graph_input in enumerate(self.onnx_graph.input): | |
input_name = graph_input.name | |
# Make sure input is not in weights | |
if input_name not in self.weights: | |
ncnn_model.add_layer( | |
NcnnLayer("Input", input_name, 0, 1, outputs=[input_name]) | |
) | |
refcount = self.node_reference[input_name] | |
if refcount > 1: | |
layer_input_list = [ | |
f"{input_name}_splitncnn_{j}" for j in range(refcount) | |
] | |
ncnn_model.add_layer( | |
NcnnLayer( | |
"Split", | |
f"splitncnn_input{i}", | |
1, | |
refcount, | |
[input_name], | |
layer_input_list, | |
) | |
) | |
# place MemoryData next if it is being included | |
internal_split = 0 | |
if include_mem_data: | |
for input_name, M in self.weights.items(): | |
refcount = self.node_reference[input_name] | |
if refcount != 0: | |
layer = NcnnLayer("MemoryData", input_name, 0, 1, [input_name]) | |
M_dims_size = len(M.dims) | |
if M_dims_size == 0: | |
layer.add_param(0, get_tensor_proto_data_size(M, M.data_type)) | |
elif M_dims_size == 1: | |
layer.add_param(0, M.dims[0]) | |
elif M_dims_size == 2: | |
layer.add_param(0, M.dims[1]) | |
if M.dims[0] != 1: | |
layer.add_param(1, M.dims[0]) | |
elif M_dims_size == 3: | |
layer.add_param(0, M.dims[2]) | |
layer.add_param(1, M.dims[1]) | |
if M.dims[0] != 1: | |
layer.add_param(2, M.dims[0]) | |
elif M_dims_size == 4: | |
layer.add_param(0, M.dims[3]) | |
layer.add_param(1, M.dims[2]) | |
layer.add_param(2, M.dims[1]) | |
bin_length += self.add_weight(layer, "MemoryData", M) | |
ncnn_model.add_layer(layer) | |
if refcount > 1: | |
layer_output_list = [ | |
f"{input_name}_splitncnn_{i}" for i in range(refcount) | |
] | |
ncnn_model.add_layer( | |
NcnnLayer( | |
"Split", | |
f"splitncnn_{internal_split}", | |
1, | |
refcount, | |
[input_name], | |
layer_output_list, | |
) | |
) | |
internal_split += 1 | |
for node in self.onnx_graph.node: | |
op = node.op_type | |
if op == "noop_reducedncnn": | |
continue | |
name = node.name | |
if not name: | |
name = node.output[0] | |
input_size = len(node.input) | |
output_size = len(node.output) | |
for input_name in node.input: | |
# check weight | |
if not input_name or ( | |
input_name in self.weights and self.node_reference[input_name] == 0 | |
): | |
input_size -= 1 | |
layer = NcnnLayer() | |
if op in [ | |
"Abs", | |
"Acos", | |
"Asin", | |
"Atan", | |
"Ceil", | |
"Cos", | |
"Exp", | |
"Floor", | |
"Log", | |
"Neg", | |
"Reciprocal", | |
"Sin", | |
"Sqrt", | |
"Tan", | |
"Tanh", | |
]: | |
layer.op_type = "UnaryOp" | |
elif op in [ | |
"Add", | |
"Div", | |
"Max", | |
"Min", | |
"Mul", | |
"Pow", | |
"RDiv", | |
"RSub", | |
"Sub", | |
]: | |
layer.op_type = "BinaryOp" | |
elif op == "AveragePool" or op == "MaxPool": | |
kernel_shape = get_node_attr_ai(node, "kernel_shape") | |
if kernel_shape.size == 1: | |
layer.op_type = "Pooling1D" | |
else: | |
layer.op_type = "Pooling" | |
elif op == "BatchNormalization": | |
layer.op_type = "BatchNorm" | |
elif op == "BiasGelu": | |
layer.op_type = "BiasGelu" | |
elif op == "Clip": | |
layer.op_type = "Clip" | |
elif op == "Concat": | |
layer.op_type = "Concat" | |
elif op == "Constant": | |
continue | |
elif op == "Conv": | |
kernel_shape = get_node_attr_ai(node, "kernel_shape") | |
if kernel_shape.size == 1: | |
layer.op_type = "Convolution1D" | |
else: | |
group = get_node_attr_i(node, "group", 1) | |
if group > 1: | |
layer.op_type = "ConvolutionDepthWise" | |
else: | |
layer.op_type = "Convolution" | |
elif op == "ConvTranspose": | |
group = get_node_attr_i(node, "group", 1) | |
if group > 1: | |
layer.op_type = "DeconvolutionDepthWise" | |
else: | |
layer.op_type = "Deconvolution" | |
elif op == "Crop" or op == "Slice": | |
layer.op_type = "Crop" | |
elif op == "DepthToSpace" or op == "PixelShuffle": | |
layer.op_type = "PixelShuffle" | |
elif op == "Dropout": | |
layer.op_type = "Dropout" | |
output_size = 1 | |
elif op == "Elu": | |
layer.op_type = "ELU" | |
elif op == "EmbedLayerNormalization": | |
layer.op_type = "EmbedLayerNormalization" | |
elif op == "Flatten": | |
layer.op_type = "Flatten" | |
elif op == "Gelu": | |
layer.op_type = "GELU" | |
elif op == "Gemm": | |
alpha = get_node_attr_f(node, "alpha", 1) | |
beta = get_node_attr_f(node, "beta", 1) | |
transA = get_node_attr_i(node, "transA", 0) | |
transB = get_node_attr_i(node, "transB", 0) | |
if alpha == 1 and beta == 1 and transA == 0 and transB == 1: | |
# InnerProduct-like A * B + C | |
layer.op_type = "InnerProduct" | |
else: | |
layer.op_type = "Gemm" | |
elif op in [ | |
"GlobalAveragePool", | |
"GlobalMaxPool", | |
"adaptive_avg_pool2d", | |
"adaptive_max_pool2d", | |
]: | |
layer.op_type = "Pooling" | |
elif op == "GroupNorm": | |
layer.op_type = "GroupNorm" | |
elif op == "GRU": | |
layer.op_type = "GRU" | |
elif op == "HardSigmoid": | |
layer.op_type = "HardSigmoid" | |
elif op == "HardSwish": | |
layer.op_type = "HardSwish" | |
elif op == "ImageScaler": | |
layer.op_type = "Scale" | |
elif op == "InstanceNormalization": | |
layer.op_type = "InstanceNorm" | |
elif op == "LayerNorm": | |
layer.op_type = "LayerNorm" | |
elif op == "LeakyRelu" or op == "Relu": | |
layer.op_type = "ReLU" | |
elif op == "LRN": | |
layer.op_type = "LRN" | |
elif op == "LSTM": | |
layer.op_type = "LSTM" | |
elif op == "MatMul": | |
if ( | |
node.input[1] in self.weights | |
and len(self.weights[node.input[1]].dims) == 2 | |
): | |
layer.op_type = "InnerProduct" | |
else: | |
layer.op_type = "Gemm" | |
elif op == "MultiHeadAttention": | |
layer.op_type = "MultiHeadAttention" | |
elif op == "Normalize": | |
layer.op_type = "Normalize" | |
elif op == "Pad": | |
layer.op_type = "Padding" | |
elif op == "PRelu": | |
layer.op_type = "PReLU" | |
elif op in [ | |
"ReduceMax", | |
"ReduceMin", | |
"ReduceMean", | |
"ReduceProd", | |
"ReduceSum", | |
"ReduceSumSquare", | |
"ReduceL1", | |
"ReduceL2", | |
"ReduceLogSum", | |
"ReduceLogSumExp", | |
]: | |
layer.op_type = "Reduction" | |
elif op == "Reorg": | |
layer.op_type = "Reorg" | |
elif op == "Reshape": | |
layer.op_type = "Reshape" | |
elif op == "RNN": | |
layer.op_type = "RNN" | |
elif op == "ShuffleChannel": | |
layer.op_type = "ShuffleChannel" | |
elif op == "Sigmoid": | |
layer.op_type = "Sigmoid" | |
elif op == "SkipLayerNormalization": | |
layer.op_type = "SkipLayerNormalization" | |
elif op == "Softmax": | |
layer.op_type = "Softmax" | |
elif op == "Softplus": | |
layer.op_type = "Softplus" | |
elif op == "Split": | |
layer.op_type = "Slice" | |
elif op == "Squeeze": | |
layer.op_type = "Squeeze" | |
elif op == "Sum": | |
layer.op_type = "Eltwise" | |
elif op == "Swish": | |
layer.op_type = "Swish" | |
elif op == "Transpose": | |
layer.op_type = "Permute" | |
elif op == "Upsample" or op == "Resize": | |
layer.op_type = "Interp" | |
elif op == "Unsqueeze": | |
layer.op_type = "ExpandDims" | |
else: | |
error_msg = f"{op} not currently supported by NCNN." | |
raise ValueError(error_msg) | |
layer.name = name | |
layer.num_inputs = input_size | |
layer.num_outputs = output_size | |
layer.params.set_op(layer.op_type) | |
for input_name in node.input: | |
# check weight | |
if input_name and not ( | |
input_name in self.weights and self.node_reference[input_name] == 0 | |
): | |
if input_name in split_node_reference: | |
refidx = split_node_reference[input_name] - 1 | |
split_node_reference[input_name] = refidx | |
input_name = f"{input_name}_splitncnn_{refidx}" | |
layer.inputs.append(input_name) | |
for o in range(output_size): | |
layer.outputs.append(node.output[o]) | |
if op == "Abs": | |
layer.add_param(0, UOT.ABS) | |
elif op == "Acos": | |
layer.add_param(0, UOT.ACOS) | |
elif layer.op_type == "BinaryOp": | |
if op == "Add": | |
layer.add_param(0, BOT.ADD) | |
elif op == "Div": | |
layer.add_param(0, BOT.DIV) | |
elif op == "Max": | |
layer.add_param(0, BOT.MAX) | |
elif op == "Min": | |
layer.add_param(0, BOT.MIN) | |
elif op == "Mul": | |
layer.add_param(0, BOT.MUL) | |
elif op == "Pow": | |
layer.add_param(0, BOT.POW) | |
elif op == "RDiv": | |
layer.add_param(0, BOT.RDIV) | |
elif op == "RSub": | |
layer.add_param(0, BOT.RSUB) | |
elif op == "Sub": | |
layer.add_param(0, BOT.SUB) | |
with_scalar = get_node_attr_i(node, "with_scalar", 0) | |
b = get_node_attr_f(node, "b", 0) | |
if with_scalar: | |
layer.add_param(1, with_scalar) | |
layer.add_param(2, b) | |
elif op == "Asin": | |
layer.add_param(0, UOT.ASIN) | |
elif op == "Atan": | |
layer.add_param(0, UOT.ATAN) | |
elif op == "AveragePool" or op == "MaxPool": | |
auto_pad = get_node_attr_s(node, "auto_pad") | |
ceil_mode = get_node_attr_i(node, "ceil_mode", 0) | |
kernel_shape = get_node_attr_ai(node, "kernel_shape") | |
strides = get_node_attr_ai(node, "strides") | |
pads = get_node_attr_ai(node, "pads") | |
pool = int(op == "AveragePool") | |
if ceil_mode == 1: | |
pad_mode = PAM.FULL | |
elif auto_pad == "SAME_UPPER": | |
pad_mode = PAM.SAMEUPPER | |
elif auto_pad == "SAME_LOWER": | |
pad_mode = PAM.SAMELOWER | |
else: | |
pad_mode = PAM.VALID | |
layer.add_param(0, pool) | |
if kernel_shape.size == 1: | |
layer.add_param(1, int(kernel_shape[0])) | |
elif kernel_shape.size == 2: | |
layer.add_param(1, int(kernel_shape[1])) | |
layer.add_param(11, int(kernel_shape[0])) | |
if strides.size == 1: | |
layer.add_param(2, int(strides[0])) | |
elif strides.size == 2: | |
layer.add_param(2, int(strides[1])) | |
layer.add_param(12, int(strides[0])) | |
if pads.size == 1: | |
layer.add_param(3, int(pads[0])) | |
elif pads.size == 2: | |
layer.add_param(3, int(pads[1])) | |
layer.add_param(13, int(pads[0])) | |
elif pads.size == 4: | |
layer.add_param(3, int(pads[1])) | |
layer.add_param(13, int(pads[0])) | |
layer.add_param(14, int(pads[3])) | |
layer.add_param(15, int(pads[2])) | |
layer.add_param(5, pad_mode) | |
if pool: | |
avgpool_count_include_pad = get_node_attr_i( | |
node, "count_include_pad", 0 | |
) | |
layer.add_param(6, avgpool_count_include_pad) | |
elif op == "BatchNormalization": | |
epsilon = get_node_attr_f(node, "epsilon", 0.00001) | |
scale = self.weights[node.input[1]] | |
B = self.weights[node.input[2]] | |
mean = self.weights[node.input[3]] | |
var = self.weights[node.input[4]] | |
channels = get_tensor_proto_data_size(scale, scale.data_type) | |
layer.add_param(0, channels) | |
bin_length += self.add_weight(layer, "slope", scale) | |
bin_length += self.add_weight(layer, "mean", mean) | |
# apply epsilon to var | |
v = onph.to_array(var) | |
ve = np.array([v[i] + epsilon for i in range(channels)], np.float32) | |
bin_length += self.add_weight(layer, "variance", ve) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "BiasGelu": | |
B = self.weights[node.input[1]] | |
layer.add_param(0, get_tensor_proto_data_size(B, B.data_type)) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "Ceil": | |
layer.add_param(0, UOT.CEIL) | |
elif op == "Clip": | |
if len(node.input) == 1: | |
minimum = get_node_attr_f(node, "min", -FLOAT32_MAX) | |
maximum = get_node_attr_f(node, "max", FLOAT32_MAX) | |
else: | |
minimum = ( | |
get_node_attr_from_input_f(self.weights[node.input[1]]) | |
if node.input[1] in self.weights | |
else -FLOAT32_MAX | |
) | |
maximum = ( | |
get_node_attr_from_input_f(self.weights[node.input[2]]) | |
if node.input[2] in self.weights | |
else FLOAT32_MAX | |
) | |
layer.add_param(0, minimum) | |
layer.add_param(1, maximum) | |
elif op == "Concat": | |
axis = get_node_attr_i(node, "axis", 1) | |
layer.add_param(0, axis - 1 if axis > 0 else axis) | |
elif op == "Constant": | |
logger.error("chaiNNer: code should not have reached inside Constant") | |
elif op == "Conv": | |
W = self.weights[node.input[1]] | |
num_filter = W.dims[0] | |
has_bias = int(len(node.input) == 3) | |
auto_pad = get_node_attr_s(node, "auto_pad") | |
kernel_shape = get_node_attr_ai(node, "kernel_shape") | |
dilations = get_node_attr_ai(node, "dilations") | |
strides = get_node_attr_ai(node, "strides") | |
pads = get_node_attr_ai(node, "pads") | |
group = get_node_attr_i(node, "group", 1) | |
layer.add_param(0, num_filter) | |
if kernel_shape.size == 1: | |
layer.add_param(1, int(kernel_shape[0])) | |
elif kernel_shape.size == 2: | |
layer.add_param(1, int(kernel_shape[1])) | |
layer.add_param(11, int(kernel_shape[0])) | |
if dilations.size == 1: | |
layer.add_param(2, int(dilations[0])) | |
elif dilations.size == 2: | |
layer.add_param(2, int(dilations[1])) | |
layer.add_param(12, int(dilations[0])) | |
if strides.size == 1: | |
layer.add_param(3, int(strides[0])) | |
elif strides.size == 2: | |
layer.add_param(3, int(strides[1])) | |
layer.add_param(13, int(strides[0])) | |
if auto_pad == "SAME_UPPER": | |
layer.add_param(4, -233) | |
elif auto_pad == "SAME_LOWER": | |
layer.add_param(4, -234) | |
else: | |
if pads.size == 1: | |
layer.add_param(4, int(pads[0])) | |
elif pads.size == 2: | |
layer.add_param(4, int(pads[1])) | |
layer.add_param(14, int(pads[0])) | |
elif pads.size == 4: | |
layer.add_param(4, int(pads[1])) | |
layer.add_param(14, int(pads[0])) | |
layer.add_param(15, int(pads[3])) | |
layer.add_param(16, int(pads[2])) | |
layer.add_param(5, has_bias) | |
layer.add_param(6, get_tensor_proto_data_size(W, W.data_type)) | |
if group > 1: | |
layer.add_param(7, int(group)) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
bin_length += self.add_weight(layer, "weight", W, quantize_tag) | |
if has_bias: | |
B = self.weights[node.input[2]] | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "ConvTranspose": | |
W = self.weights[node.input[1]] | |
has_bias = int(len(node.input) == 3) | |
auto_pad = get_node_attr_s(node, "auto_pad") | |
kernel_shape = get_node_attr_ai(node, "kernel_shape") | |
dilations = get_node_attr_ai(node, "dilations") | |
strides = get_node_attr_ai(node, "strides") | |
output_padding = get_node_attr_ai(node, "output_padding") | |
output_shape = get_node_attr_ai(node, "output_shape") | |
pads = get_node_attr_ai(node, "pads") | |
group = get_node_attr_i(node, "group", 1) | |
num_filter = W.dims[1] * group | |
layer.add_param(0, num_filter) | |
if kernel_shape.size == 1: | |
layer.add_param(1, int(kernel_shape[0])) | |
elif kernel_shape.size == 2: | |
layer.add_param(1, int(kernel_shape[1])) | |
layer.add_param(11, int(kernel_shape[0])) | |
if dilations.size == 1: | |
layer.add_param(2, int(dilations[0])) | |
elif dilations.size == 2: | |
layer.add_param(2, int(dilations[1])) | |
layer.add_param(12, int(dilations[0])) | |
if strides.size == 1: | |
layer.add_param(3, int(strides[0])) | |
elif strides.size == 2: | |
layer.add_param(3, int(strides[1])) | |
layer.add_param(13, int(strides[0])) | |
if auto_pad == "SAME_UPPER": | |
layer.add_param(4, -233) | |
elif auto_pad == "SAME_LOWER": | |
layer.add_param(4, -234) | |
else: | |
if pads.size == 1: | |
layer.add_param(4, int(pads[0])) | |
elif pads.size == 2: | |
layer.add_param(4, int(pads[1])) | |
layer.add_param(14, int(pads[0])) | |
elif pads.size == 4: | |
layer.add_param(4, int(pads[1])) | |
layer.add_param(14, int(pads[0])) | |
layer.add_param(15, int(pads[3])) | |
layer.add_param(16, int(pads[2])) | |
if output_padding.size == 1: | |
layer.add_param(18, int(output_padding[0])) | |
elif output_padding.size == 2: | |
layer.add_param(18, int(output_padding[1])) | |
layer.add_param(19, int(output_padding[0])) | |
if output_shape.size == 1: | |
layer.add_param(20, int(output_shape[0])) | |
elif output_shape == 2: | |
layer.add_param(20, int(output_shape[1])) | |
layer.add_param(21, int(output_shape[0])) | |
layer.add_param(5, has_bias) | |
weight_data_size = get_tensor_proto_data_size(W, W.data_type) | |
layer.add_param(6, weight_data_size) | |
if group > 1: | |
layer.add_param(7, group) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
weight_data = onph.to_array(W) | |
bin_length += self.add_weight( | |
layer, "weight", weight_data.swapaxes(0, 1), quantize_tag | |
) | |
if has_bias: | |
B = self.weights[node.input[2]] | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "Cos": | |
layer.add_param(0, UOT.COS) | |
elif op == "Crop": | |
starts = get_node_attr_ai(node, "starts") | |
layer.add_param(9, [starts.size, *starts]) | |
ends = get_node_attr_ai(node, "ends") | |
layer.add_param(10, [ends.size, *ends]) | |
axes = get_node_attr_ai(node, "axis") | |
layer.add_param(11, [axes.size, *axes]) | |
elif op == "DepthToSpace": | |
# pixelshuffle | |
scale_factor = get_node_attr_i(node, "blocksize", 1) | |
mode = get_node_attr_s(node, "mode") | |
layer.add_param(0, scale_factor) | |
if mode == "CRD": | |
layer.add_param(1, 0) | |
elif mode == "DCR": | |
layer.add_param(1, 1) | |
elif op == "Dropout": | |
pass | |
elif op == "Elu": | |
alpha = get_node_attr_f(node, "alpha", 1) | |
layer.add_param(0, alpha) | |
elif op == "EmbedLayerNormalization": | |
logger.error(f"chaiNNer: No NCNN documentation for {op} yet, will not function") | |
words = self.weights[node.input[2]] | |
positions = self.weights[node.input[3]] | |
W = self.weights[node.input[5]] | |
B = self.weights[node.input[6]] | |
layer.add_param(0, get_tensor_proto_data_size(B, B.data_type)) | |
layer.add_param(1, get_tensor_proto_data_size(words, words.data_type)) | |
layer.add_param( | |
2, get_tensor_proto_data_size(positions, positions.data_type) | |
) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
bin_length += self.add_weight(layer, "words", words, DTYPE_FP32) | |
bin_length += self.add_weight(layer, "positions", positions, DTYPE_FP32) | |
bin_length += self.add_weight(layer, "weight", W, quantize_tag) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "Exp": | |
layer.add_param(0, UOT.EXP) | |
elif op == "Flatten": | |
axis = get_node_attr_i(node, "axis", 1) | |
if axis != 1: | |
raise ValueError(f"Unsupported Flatten axis {axis}.") | |
elif op == "Floor": | |
layer.add_param(0, UOT.FLOOR) | |
elif op == "Gelu": | |
layer.add_param(0, 1) | |
elif op == "Gemm": | |
alpha = get_node_attr_f(node, "alpha", 1) | |
beta = get_node_attr_f(node, "beta", 1) | |
transA = get_node_attr_i(node, "transA", 0) | |
transB = get_node_attr_i(node, "transB", 0) | |
if alpha == 1 and beta == 1 and transA == 0 and transB == 1: | |
# InnerProduct-like A * B * C | |
B = self.weights[node.input[1]] | |
C = self.weights[node.input[2]] | |
layer.add_param(0, get_tensor_proto_data_size(C, C.data_type)) | |
layer.add_param(1, 1) | |
layer.add_param(2, get_tensor_proto_data_size(B, B.data_type)) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
bin_length += self.add_weight(layer, "B", B, quantize_tag) | |
bin_length += self.add_weight(layer, "C", C) | |
else: | |
# gemm | |
layer.add_param(0, alpha) | |
layer.add_param(1, beta) | |
layer.add_param(2, transA) | |
layer.add_param(3, transB) | |
elif op == "GlobalAveragePool" or op == "GlobalMaxPool": | |
layer.add_param(0, int(op == "GlobalAveragePool")) | |
layer.add_param(4, 1) | |
elif op == "adaptive_avg_pool2d" or op == "adaptive_max_pool2d": | |
out_shape_tp = self.weights[node.input[1]] | |
out_shape = get_node_attr_from_input_ai(out_shape_tp) | |
layer.add_param(0, int(op == "adaptive_avg_pool2d")) | |
layer.add_param(7, 1) | |
if out_shape.size == 1: | |
layer.add_param(8, int(out_shape[0])) | |
elif out_shape.size == 2: | |
layer.add_param(8, int(out_shape[1])) # out_w | |
layer.add_param(18, int(out_shape[0])) # out_h | |
elif op == "GroupNorm": | |
groups = get_node_attr_i(node, "groups", 1) | |
channels = get_node_attr_i(node, "channels", 1) | |
eps = get_node_attr_f(node, "epsilon", 0.00001) | |
affine = get_node_attr_i(node, "affine", 1) | |
if affine: | |
# discard affine-less S=1 B=0 | |
affine_S = get_node_attr_from_input_af(self.weights[node.input[1]]) | |
affine_B = get_node_attr_from_input_af(self.weights[node.input[2]]) | |
if ( | |
affine_S.size == 1 | |
and affine_S[0] == 1 | |
and affine_B.size == 1 | |
and affine_B[0] == 0 | |
): | |
affine = 0 | |
else: | |
if np.any(affine_S[:channels] != 1) or np.any( | |
affine_B[:channels] != 0 | |
): | |
affine = 1 | |
else: | |
affine = 0 | |
layer.add_param(0, groups) | |
layer.add_param(1, channels) | |
layer.add_param(2, eps) | |
layer.add_param(3, affine) | |
if affine: | |
scale = self.weights[node.input[1]] | |
B = self.weights[node.input[2]] | |
bin_length += self.add_weight(layer, "scale", scale) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "GRU": | |
# W = self.weights[node.input[1]] | |
# R = self.weights[node.input[2]] | |
# B = self.weights[node.input[3]] | |
# hidden_size = get_node_attr_i(node, "hidden_size", 0) | |
# direction = get_node_attr_s(node, "direction") | |
# if direction == "forward": | |
# direction_type = GRU.FORWARD | |
# elif direction == "reverse": | |
# direction_type = GRU.REVERSE | |
# elif direction == "bidirectional": | |
# direction_type = GRU.BIDIRECTIONAL | |
# weight_data_size = get_tensor_proto_data_size(W) | |
# layer.add_param(0, hidden_size) | |
# layer.add_param(1, weight_data_size) | |
# layer.add_param(2, direction_type) | |
# num_directions = 2 if direction_type == GRU.BIDIRECTIONAL else 1 | |
# reorder num_directions-URN-hidden_size to num_directions-RUN-hidden_size | |
# quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
# logger.error( | |
# "Not sure GRU weight reordering is accurate, " | |
# "docs and code comments appear to give different shape orders" | |
# ) | |
# W_array = onph.to_array(W) | |
# W_array = np.stack( | |
# (W_array[:, 1, :], W_array[:, 0, :], W_array[:, 2, :]), axis=1 | |
# ) | |
# bin_length += self.add_weight(layer, W_array, "weight_xc_data", quantize_tag, is_fp16) | |
# reduce U and R bias except N | |
# reorder num_directions-URN-hidden to num_directions-RUN-hidden | |
# B_array = onph.to_array(B) | |
# bias_data_size_g = B_array.size / 6 / num_directions | |
# for i in range(bias_data_size_g)[1:]: | |
# pass | |
raise RuntimeError( | |
"GRU not implemented yet, please report issue with model used" | |
) | |
elif op == "HardSigmoid" or op == "Hard Swish": | |
alpha = get_node_attr_f(node, "alpha", 0.2) | |
beta = get_node_attr_f(node, "beta", 0.5) | |
layer.add_param(0, alpha) | |
layer.add_param(1, beta) | |
elif op == "ImageScaler": | |
bias = get_node_attr_af(node, "bias") | |
scale = get_node_attr_f(node, "scale", 1) | |
channels = bias.size | |
layer.add_param(0, channels) | |
layer.add_param(1, 1) | |
bin_length += self.add_weight(layer, "scale", np.array((scale,) * 3)) | |
bin_length += self.add_weight(layer, "bias", bias) | |
elif op == "InstanceNormalization": | |
eps = get_node_attr_f(node, "epsilon", 0.00001) | |
# Discard affine-less S=1 B=0 | |
affine_S = get_node_attr_from_input_af(self.weights[node.input[1]]) | |
affine_B = get_node_attr_from_input_af(self.weights[node.input[2]]) | |
channels = affine_S.size | |
if np.any(affine_S[:channels] != 1) or np.any(affine_B[:channels] != 0): | |
affine = 1 | |
else: | |
affine = 0 | |
layer.add_param(0, channels) | |
layer.add_param(1, eps) | |
layer.add_param(2, affine) | |
if affine: | |
scale = self.weights[node.input[1]] | |
B = self.weights[node.input[2]] | |
bin_length += self.add_weight(layer, "scale", scale) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "LayerNorm": | |
eps = get_node_attr_f(node, "epsilon", 0.00001) | |
affine = get_node_attr_i(node, "affine", 1) | |
if affine: | |
# discard affine-less S=1 B=0 | |
affine_S = get_node_attr_from_input_af(self.weights[node.input[1]]) | |
affine_B = get_node_attr_from_input_af(self.weights[node.input[2]]) | |
affine_size = affine_S.size | |
if np.any(affine_S[:affine_size] != 1) or np.any( | |
affine_B[:affine_size] | |
): | |
affine = 1 | |
else: | |
affine = 0 | |
if affine: | |
layer.add_param(0, affine_size) | |
layer.add_param(1, eps) | |
layer.add_param(2, affine) | |
if affine: | |
scale = self.weights[node.input[1]] | |
B = self.weights[node.input[2]] | |
bin_length += self.add_weight(layer, "scale", scale) | |
bin_length += self.add_weight(layer, "bias", B) | |
elif op == "LeakyRelu": | |
alpha = get_node_attr_f(node, "alpha", 0.01) | |
layer.add_param(0, alpha) | |
elif op == "Log": | |
layer.add_param(0, UOT.LOG) | |
elif op == "LRN": | |
layer.add_param(0, 0) | |
layer.add_param(1, get_node_attr_i(node, "size", 1)) | |
layer.add_param(2, get_node_attr_f(node, "alpha", 1)) | |
layer.add_param(3, get_node_attr_f(node, "beta", 0.5)) | |
layer.add_param(4, get_node_attr_f(node, "bias", 1)) | |
elif op == "LSTM": | |
# W = self.weights[node.input[1]] | |
# R = self.weights[node.input[2]] | |
# B = self.weights[node.input[3]] | |
# hidden_size = get_node_attr_i(node, "hidden_size", 0) | |
# direction = get_node_attr_s(node, "direction") | |
# if direction == "forward": | |
# direction_type = GRU.FORWARD | |
# elif direction == "reverse": | |
# direction_type = GRU.REVERSE | |
# elif direction == "bidirectional": | |
# direction_type = GRU.BIDIRECTIONAL | |
raise RuntimeError( | |
"LSTM not implemented yet, please report issue with model used" | |
) | |
elif op == "MatMul": | |
if node.input[1] in self.weights: | |
# InnerProduct | |
B = self.weights[node.input[1]] | |
weight_data_size = get_tensor_proto_data_size(B, B.data_type) | |
num_output = B.dims[-1] | |
layer.add_param(0, num_output) | |
layer.add_param(1, 0) | |
layer.add_param(2, weight_data_size) | |
B_array = onph.to_array(B) | |
bin_length += self.add_weight(layer, "bias", B_array.T, DTYPE_FP32) | |
# There is a dead else here, not sure if this was incomplete code | |
elif op == "MultiHeadAttention": | |
# embed_dim = get_node_attr_i(node, "embed_dim", 0) | |
# num_heads = get_node_attr_i(node, "num_heads", 0) | |
# layer.add_param(0, embed_dim) | |
# layer.add_param(1, num_heads) | |
# if len(node.input) == 5: | |
# qkvw = self.weights[node.input[1]] | |
# qkvb = self.weights[node.input[2]] | |
# ow = self.weights[node.input[3]] | |
# ob = self.weights[node.input[4]] | |
# weight_data_size = get_tensor_proto_data_size(ow) | |
# layer.add_param(2, weight_data_size) | |
# quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
raise RuntimeError( | |
"MultiHeadAttention not implemented, please report issue with model used" | |
) | |
elif op == "Neg": | |
layer.add_param(0, UOT.NEG) | |
elif op == "Normalize": | |
eps = get_node_attr_f(node, "eps", 0) | |
layer.add_param(1, 1) # channel_shared | |
layer.add_param(2, eps) | |
layer.add_param(3, 1) # scale_data_size | |
layer.add_param(9, NEM.PYTORCH) | |
bin_length += self.add_weight(layer, "scale", 1) | |
elif op == "Pad": | |
mode = get_node_attr_s(node, "mode") | |
value = get_node_attr_f(node, "value", 0) | |
if len(node.input) == 1: | |
pads = get_node_attr_ai(node, "pads") | |
else: | |
pads = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
if mode == "edge": | |
ptype = PAT.REPLICATE | |
elif mode == "reflect": | |
ptype = PAT.REFLECT | |
else: | |
ptype = PAT.CONSTANT | |
pad_size = pads.size | |
top = bottom = front = behind = 0 | |
if pad_size == 8: | |
# NCHW | |
top = pads[2] | |
bottom = pads[6] | |
left = pads[3] | |
right = pads[7] | |
front = pads[1] | |
behind = pads[5] | |
elif pad_size == 6: | |
# NHW | |
top = pads[1] | |
bottom = pads[4] | |
left = pads[2] | |
right = pads[5] | |
else: | |
# NW | |
left = pads[1] | |
right = pads[3] | |
layer.add_param(0, int(top)) | |
layer.add_param(1, int(bottom)) | |
layer.add_param(2, int(left)) | |
layer.add_param(3, int(right)) | |
layer.add_param(4, int(ptype)) | |
layer.add_param(5, int(value)) | |
layer.add_param(7, int(front)) | |
layer.add_param(8, int(behind)) | |
elif op == "PixelShuffle": | |
layer.add_param(0, get_node_attr_i(node, "scale_factor", 1)) | |
elif op == "PRelu": | |
slope = self.weights[node.input[1]] | |
num_slope = get_tensor_proto_data_size(slope, slope.data_type) | |
layer.add_param(0, num_slope) | |
bin_length += self.add_weight(layer, "slope", slope) | |
elif op == "Reciprocal": | |
layer.add_param(0, UOT.RECIPROCAL) | |
elif op in [ | |
"ReduceMax", | |
"ReduceMin", | |
"ReduceMean", | |
"ReduceProd", | |
"ReduceSum", | |
"ReduceSumSquare", | |
"ReduceL1", | |
"ReduceL2", | |
"ReduceLogSum", | |
"ReduceLogSumExp", | |
]: | |
if op == "ReduceSum": | |
op_type = ROT.SUM | |
elif op == "ReduceSumSquare": | |
op_type = ROT.SUMSQ | |
elif op == "ReduceMean": | |
op_type = ROT.MEAN | |
elif op == "ReduceMax": | |
op_type = ROT.MAX | |
elif op == "ReduceMin": | |
op_type = ROT.MIN | |
elif op == "ReduceProd": | |
op_type = ROT.PROD | |
elif op == "ReduceL1": | |
op_type = ROT.L1 | |
elif op == "ReduceL2": | |
op_type = ROT.L2 | |
elif op == "ReduceLogSum": | |
op_type = ROT.LOGSUM | |
elif op == "ReduceLogSumExp": | |
op_type = ROT.LOGSUMEXP | |
else: | |
op_type = -233 | |
layer.add_param(0, op_type) | |
axes = get_node_attr_ai(node, "axes") | |
keepdims = get_node_attr_i(node, "keepdims", 1) | |
if axes.size > 0: | |
# if axes set, reduce according to axes | |
layer.add_param(1, 0) | |
for axis in axes: | |
if axis == 0 or axis > 4 or axis < -3: | |
raise ValueError(f"Unsupported axis {axis} in Reduction") | |
layer.add_param( | |
3, | |
[axes.size, *[a - 1 if a > 0 else a for a in axes]], | |
) | |
else: | |
# if axes not set, reduce all axes by default | |
layer.add_param(1, 1) | |
layer.add_param(4, keepdims) | |
logger.error("chaiNNer: No NCNN documentation for Reduction param 5") | |
layer.add_param(5, 1) | |
elif op == "Reorg": | |
layer.add_param(0, get_node_attr_i(node, "stride", 1)) | |
elif op == "Reshape": | |
if len(node.input) == 1: | |
shape = get_node_attr_ai(node, "shape") | |
else: | |
shape = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
shape_size = shape.size | |
if shape_size == 1: | |
logger.error("chaiNNer: Should never reach shape.size == 1 in Reshape") | |
layer.add_param(0, int(shape[0])) | |
elif shape_size == 2: | |
layer.add_param(0, int(shape[1])) | |
elif shape_size == 3: | |
layer.add_param(0, int(shape[2])) | |
layer.add_param(1, int(shape[1])) | |
elif shape_size == 4: | |
layer.add_param(0, int(shape[3])) | |
layer.add_param(1, int(shape[2])) | |
layer.add_param(2, int(shape[1])) | |
elif shape_size == 5: | |
layer.add_param(0, int(shape[3] * shape[3])) | |
layer.add_param(1, int(shape[2])) | |
layer.add_param(2, int(shape[1])) | |
elif op == "Resize": | |
mode = get_node_attr_s(node, "mode") | |
align = get_node_attr_s(node, "coordinate_transformation_mode") | |
if len(node.input) == 2: | |
# opset 10 | |
scales = get_node_attr_from_input_af(self.weights[node.input[1]]) | |
sizes = np.empty(0, np.int32) | |
else: | |
# opset 11+ | |
scales = get_node_attr_from_input_af(self.weights[node.input[2]]) | |
if len(node.input) >= 4: | |
sizes = get_node_attr_from_input_ai(self.weights[node.input[3]]) | |
else: | |
sizes = np.empty(0, np.int32) | |
if mode == "linear": | |
resize_type = IRT.BILINEAR | |
elif mode == "cubic": | |
resize_type = IRT.BICUBIC | |
else: | |
resize_type = IRT.NEAREST | |
if scales.size == 0 and sizes.size == 0: | |
raise ValueError( | |
"Unsupported Resize scales and sizes are all empty." | |
) | |
if scales.size == 2: | |
h_scale = 1 | |
w_scale = scales[1] | |
elif scales.size == 3: | |
h_scale = scales[1] | |
w_scale = scales[2] | |
elif scales.size == 4: | |
if scales[1] != 1: | |
raise TypeError(f"Unsupported Resize scales {scales}.") | |
h_scale = scales[2] | |
w_scale = scales[3] | |
else: | |
h_scale = 1 | |
w_scale = 1 | |
if sizes.size == 2: | |
output_height = 0 | |
output_width = sizes[1] | |
elif sizes.size == 3: | |
output_height = sizes[1] | |
output_width = sizes[2] | |
elif sizes.size == 4: | |
output_height = sizes[2] | |
output_width = sizes[3] | |
else: | |
output_height = 0 | |
output_width = 0 | |
align_corner = int(align == "align_corners") | |
layer.add_param(0, resize_type) | |
layer.add_param(1, float(h_scale)) | |
layer.add_param(2, float(w_scale)) | |
layer.add_param(3, int(output_height)) | |
layer.add_param(4, int(output_width)) | |
layer.add_param(6, align_corner) | |
elif op == "RNN": | |
W = self.weights[node.input[1]] | |
R = self.weights[node.input[2]] | |
B = self.weights[node.input[3]] | |
hidden_size = get_node_attr_i(node, "hidden_size", 0) | |
direction = get_node_attr_s(node, "direction") | |
if direction == "reverse": | |
direction_type = GRU.REVERSE | |
elif direction == "bidirectional": | |
direction_type = GRU.BIDIRECTIONAL | |
else: | |
direction_type = GRU.FORWARD | |
weight_data_size = get_tensor_proto_data_size(W, W.data_type) | |
layer.add_param(0, hidden_size) | |
layer.add_param(1, weight_data_size) | |
layer.add_param(2, direction_type) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
bin_length += self.add_weight(layer, "weight", W, quantize_tag) | |
# reduce xc and hc bias | |
reduced_B = np.sum(onph.to_array(B), 1) | |
bin_length += self.add_weight(layer, "bias", reduced_B, quantize_tag) | |
bin_length += self.add_weight(layer, "R", R, quantize_tag) | |
elif op == "ShuffleChannel": | |
layer.add_param(0, get_node_attr_i(node, "group", 1)) | |
layer.add_param(1, get_node_attr_i(node, "reverse", 0)) | |
elif op == "Sigmoid": | |
pass | |
elif op == "Sin": | |
layer.add_param(0, UOT.SIN) | |
elif op == "SkipLayerNormalization": | |
logger.error(f"chaiNNer: No NCNN documentation for {op} yet, will not function") | |
W = self.weights[node.input[2]] | |
B = self.weights[node.input[3]] | |
B2 = self.weights[node.input[4]] | |
layer.add_param(0, get_tensor_proto_data_size(B, B.data_type)) | |
quantize_tag = DTYPE_FP16 if is_fp16 else DTYPE_FP32 | |
bin_length += self.add_weight(layer, "weight", W, quantize_tag) | |
bin_length += self.add_weight(layer, "bias1", B, DTYPE_FP32) | |
bin_length += self.add_weight(layer, "bias2", B2, DTYPE_FP32) | |
elif op == "Slice": | |
input_size = len(node.input) | |
if input_size == 1: | |
starts = get_node_attr_ai(node, "starts") | |
ends = get_node_attr_ai(node, "ends") | |
axes = get_node_attr_ai(node, "axes") | |
steps = get_node_attr_ai(node, "steps") | |
else: | |
starts = get_node_attr_from_input_ai(self.weights[node.input[1]]) | |
ends = get_node_attr_from_input_ai(self.weights[node.input[2]]) | |
if input_size >= 4: | |
axes = get_node_attr_from_input_ai(self.weights[node.input[3]]) | |
else: | |
axes = np.empty(0, np.int32) | |
if input_size >= 5: | |
steps = get_node_attr_from_input_ai(self.weights[node.input[4]]) | |
else: | |
steps = np.empty(0, np.int32) | |
assert np.all(steps != 1), f"Unsupported Slice step {steps}" | |
# Filter out N-dim axis | |
if axes.size: | |
for i, axis in enumerate(axes): | |
if axis == 0: | |
np.delete(starts, i) | |
np.delete(ends, i) | |
np.delete(axes, i) | |
break | |
layer.add_param(9, [starts.size, *list(starts)]) | |
layer.add_param(10, [ends.size, *list(ends)]) | |
if axes.size: | |
assert np.all( | |
axes != 0 and axes <= 3 and axes >= -3 | |
), f"Unsupported Slice axes {axes}" | |
layer.add_param( | |
11, [axes.size, *[a - 1 if a > 0 else a for a in axes]] | |
) | |
elif op == "Softmax": | |
axis = get_node_attr_i(node, "axis", 1) | |
layer.add_param(0, axis - 1) | |
layer.add_param(1, 1) | |
elif op == "Split": | |
axis = get_node_attr_i(node, "axis", 0) | |
splits = get_node_attr_ai(node, "split") | |
assert axis >= 1, f"Unsupported axis {axis} in Split" | |
if splits.size: | |
layer.add_param(0, [output_size, *list(splits[:-1]), -233]) | |
else: | |
layer.add_param( | |
0, [output_size, *[-233 for _ in range(output_size)]] | |
) | |
layer.add_param(1, axis - 1) | |
elif op == "Sqrt": | |
layer.add_param(0, UOT.SQRT) | |
elif op == "Squeeze": | |
axes = get_node_attr_ai(node, "axes") | |
if axes.size: | |
assert np.all( | |
axes != 0 and axes <= 4 and axes >= -3 | |
), f"Unsupported Squeeze axes {axes}" | |
layer.add_param( | |
3, [axes.size, *[a - 1 if a > 0 else a for a in axes]] | |
) | |
else: | |
layer.add_param(0, 1) | |
layer.add_param(1, 1) | |
layer.add_param(2, 1) | |
elif op == "Sum": | |
layer.add_param(0, EOT.SUM) | |
elif op == "Swish": | |
pass | |
elif op == "Tan": | |
layer.add_param(0, UOT.TAN) | |
elif op == "Tanh": | |
layer.add_param(0, UOT.TANH) | |
elif op == "Transpose": | |
perm = get_node_attr_ai(node, "perm") | |
if perm.size == 3: | |
if (perm[1] == 1 and perm[2] == 2) or ( | |
perm[0] == 1 and perm[1] == 0 and perm[2] == 2 | |
): | |
layer.add_param(0, POT.WH_WHC_WHDC) | |
elif (perm[1] == 2 and perm[2] == 1) or ( | |
perm[0] == 2 and perm[1] == 0 and perm[2] == 1 | |
): | |
layer.add_param(0, POT.HW_HWC_HWDC) | |
elif perm.size == 4: | |
if perm[1] == 1 and perm[2] == 2 and perm[3] == 3: | |
layer.add_param(0, POT.WH_WHC_WHDC) | |
elif perm[1] == 1 and perm[2] == 3 and perm[3] == 2: | |
layer.add_param(0, POT.HW_HWC_HWDC) | |
elif perm[1] == 2 and perm[2] == 1 and perm[3] == 3: | |
layer.add_param(0, POT.WCH_WDHC) | |
elif perm[1] == 2 and perm[2] == 3 and perm[3] == 1: | |
layer.add_param(0, POT.CWH_DWHC) | |
elif perm[1] == 3 and perm[2] == 1 and perm[3] == 2: | |
layer.add_param(0, POT.HCW_HDWC) | |
elif perm[1] == 3 and perm[2] == 2 and perm[3] == 1: | |
layer.add_param(0, POT.CHW_DHWC) | |
elif perm.size == 5: | |
if perm[1] == 1 and perm[2] == 2 and perm[3] == 3 and perm[4] == 4: | |
layer.add_param(0, POT.WH_WHC_WHDC) | |
elif ( | |
perm[1] == 1 and perm[2] == 3 and perm[3] == 4 and perm[4] == 2 | |
): | |
layer.add_param(0, POT.HW_HWC_HWDC) | |
elif ( | |
perm[1] == 2 and perm[2] == 1 and perm[3] == 3 and perm[4] == 4 | |
): | |
layer.add_param(0, POT.WCH_WDHC) | |
elif ( | |
perm[1] == 2 and perm[2] == 3 and perm[3] == 4 and perm[4] == 1 | |
): | |
layer.add_param(0, POT.CWH_DWHC) | |
elif ( | |
perm[1] == 3 and perm[2] == 4 and perm[3] == 1 and perm[4] == 2 | |
): | |
layer.add_param(0, POT.HCW_HDWC) | |
elif ( | |
perm[1] == 3 and perm[2] == 4 and perm[3] == 2 and perm[4] == 1 | |
): | |
layer.add_param(0, POT.CHW_DHWC) | |
else: | |
error_msg = f"Unsupported Transpose type {perm}" | |
raise ValueError(error_msg) | |
elif op == "Upsample": | |
mode = get_node_attr_s(node, "mode") | |
align = get_node_attr_s(node, "coordinate_transformation_mode") | |
if len(node.input) == 1: | |
scales = get_node_attr_af(node, "scales") | |
else: | |
scales = get_node_attr_from_input_af(self.weights[node.input[1]]) | |
if mode == "bilinear" or mode == "linear": | |
resize_type = IRT.BILINEAR | |
elif mode == "trilinear": | |
raise ValueError("Upsample does not support trilinear mode") | |
else: | |
resize_type = IRT.NEAREST | |
if scales.size == 2: | |
h_scale = 1 | |
w_scale = scales[1] | |
elif scales.size == 3: | |
h_scale = scales[1] | |
w_scale = scales[2] | |
elif scales.size == 4: | |
h_scale = scales[2] | |
w_scale = scales[3] | |
if scales[1] != 1: | |
error_msg = f"Unsupported Upsample scales {scales}" | |
raise ValueError(error_msg) | |
else: | |
error_msg = f"Unsupported Upsample scales {scales}" | |
raise ValueError(error_msg) | |
align_corner = int(align == "align_corners") | |
layer.add_param(0, resize_type) | |
layer.add_param(1, float(h_scale)) | |
layer.add_param(2, float(w_scale)) | |
layer.add_param(6, align_corner) | |
elif op == "Unsqueeze": | |
axes = get_node_attr_ai(node, "axes") | |
assert ( | |
np.all(axes != 0) and np.all(axes <= 4) and np.all(axes >= -4) | |
), f"Unsupported axes {axes} in Unsqueeze" | |
layer.add_param( | |
3, [axes.size, *[axis - 1 if axis > 0 else axis for axis in axes]] | |
) | |
else: | |
# NCNN TODO: op specific param | |
# This is presumably to catch anything they haven't written an op for yet | |
for attr in node.attribute: | |
if attr.type == 1: | |
error_msg = f"Op {op} does not exist yet; {attr.name}={attr.f}" | |
elif attr.type == 2: | |
error_msg = f"Op {op} does not exist yet; {attr.name}={attr.i}" | |
elif attr.type == 3: | |
error_msg = f"Op {op} does not exist yet; {attr.name}={attr.s}" | |
else: | |
error_msg = ( | |
f"Op {op} does not exist yet; {attr.name}={attr.type}" | |
) | |
raise ValueError(error_msg) | |
ncnn_model.add_layer(layer) | |
for o in range(output_size): | |
output_name = node.output[o] | |
if output_name in self.node_reference: | |
refcount = self.node_reference[output_name] | |
if refcount > 1: | |
ncnn_model.add_layer( | |
NcnnLayer( | |
"Split", | |
f"splitncnn_{internal_split}", | |
1, | |
refcount, | |
[output_name], | |
[ | |
f"{output_name}_splitncnn_{j}" | |
for j in range(refcount) | |
], | |
) | |
) | |
internal_split += 1 | |
ncnn_model.bin_length = bin_length | |
NcnnOptimizer(ncnn_model).optimize() | |
return ncnn_model | |