Spaces:
Running
Running
File size: 1,241 Bytes
28bdc3c b224afc 28bdc3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
)
else:
model.push_to_hub(MODEL_NAME)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
for backend in ["openvino", "onnx"]:
for use_qint8 in [True, False]:
print(f"Exporting {backend} model with QINT8={use_qint8}")
export_model(backend=backend, use_qint8=use_qint8)
|