Spaces:
Sleeping
Sleeping
# Copyright (c) Facebook, Inc. and its affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the BSD-style license found in the | |
# LICENSE file in the root directory of this source tree. | |
# | |
# LASER Language-Agnostic SEntence Representations | |
# is a toolkit to calculate multilingual sentence embeddings | |
# and to use them for document classification, bitext filtering | |
# and mining | |
# | |
# -------------------------------------------------------- | |
# | |
# evaluate similarity search on WMT newstest2011 | |
if [ -z ${LASER+x} ] ; then | |
echo "Please set the environment variable 'LASER'" | |
exit | |
fi | |
# encoder | |
model_dir="${LASER}/models" | |
encoder="${model_dir}/bilstm.93langs.2018-12-26.pt" | |
bpe_codes="${model_dir}/93langs.fcodes" | |
edir="embed" | |
if [ ! -d dev ] ; then | |
echo " - Download WMT data" | |
wget -q http://www.statmt.org/wmt13/dev.tgz | |
tar --wildcards -xf dev.tgz "dev/newstest2012.??" | |
/bin/rm dev.tgz | |
fi | |
python3 ${LASER}//source/similarity_search.py \ | |
--bpe-codes ${bpe_codes} --encoder ${encoder} \ | |
--base-dir . \ | |
--data dev/newstest2012 --output ${edir}/newstest2012 \ | |
--lang cs de en es fr --verbose | |