File size: 2,015 Bytes
05d3571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import socket

from flask import Flask, jsonify, request

from laser_encoders import LaserEncoderPipeline
from laser_encoders.language_list import LASER2_LANGUAGE, LASER3_LANGUAGE

app = Flask(__name__)

# Global cache for encoders
encoder_cache = {}

laser2_encoder = None


@app.route("/")
def root():
    print("/")
    html = "<h3>Hello {name}!</h3>" "<b>Hostname:</b> {hostname}<br/>"
    return html.format(name=os.getenv("LASER", "world"), hostname=socket.gethostname())


@app.route("/vectorize", methods=["GET"])
def vectorize():
    content = request.args.get("q")
    lang = request.args.get(
        "lang", "eng"
    )  # Default to English if 'lang' is not provided

    if content is None:
        return jsonify({"error": "Missing input content"}), 400

    try:
        global laser2_encoder
        if lang in LASER2_LANGUAGE:  # Checks for both 3-letter code or 8-letter code
            if not laser2_encoder:
                laser2_encoder = LaserEncoderPipeline(lang=lang)
            encoder = laser2_encoder
        else:
            lang_code = LASER3_LANGUAGE.get(
                lang, lang
            )  # Use language code as key to prevent multiple entries for same language
            if lang_code not in encoder_cache:
                encoder_cache[lang_code] = LaserEncoderPipeline(lang=lang_code)
            encoder = encoder_cache[lang_code]

        embeddings = encoder.encode_sentences([content])
        embeddings_list = embeddings.tolist()
        body = {"content": content, "embedding": embeddings_list}
        return jsonify(body), 200

    except ValueError as e:
        # Check if the exception is due to an unsupported language
        if "unsupported language" in str(e).lower():
            return jsonify({"error": f"Language '{lang}' is not supported."}), 400
        else:
            return jsonify({"error": str(e)}), 400


if __name__ == "__main__":
    app.run(debug=True, port=80, host="0.0.0.0")