Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os | |
import socket | |
from flask import Flask, jsonify, request | |
from laser_encoders import LaserEncoderPipeline | |
from laser_encoders.language_list import LASER2_LANGUAGE, LASER3_LANGUAGE | |
app = Flask(__name__) | |
# Global cache for encoders | |
encoder_cache = {} | |
laser2_encoder = None | |
def root(): | |
print("/") | |
html = "<h3>Hello {name}!</h3>" "<b>Hostname:</b> {hostname}<br/>" | |
return html.format(name=os.getenv("LASER", "world"), hostname=socket.gethostname()) | |
def vectorize(): | |
content = request.args.get("q") | |
lang = request.args.get( | |
"lang", "eng" | |
) # Default to English if 'lang' is not provided | |
if content is None: | |
return jsonify({"error": "Missing input content"}), 400 | |
try: | |
global laser2_encoder | |
if lang in LASER2_LANGUAGE: # Checks for both 3-letter code or 8-letter code | |
if not laser2_encoder: | |
laser2_encoder = LaserEncoderPipeline(lang=lang) | |
encoder = laser2_encoder | |
else: | |
lang_code = LASER3_LANGUAGE.get( | |
lang, lang | |
) # Use language code as key to prevent multiple entries for same language | |
if lang_code not in encoder_cache: | |
encoder_cache[lang_code] = LaserEncoderPipeline(lang=lang_code) | |
encoder = encoder_cache[lang_code] | |
embeddings = encoder.encode_sentences([content]) | |
embeddings_list = embeddings.tolist() | |
body = {"content": content, "embedding": embeddings_list} | |
return jsonify(body), 200 | |
except ValueError as e: | |
# Check if the exception is due to an unsupported language | |
if "unsupported language" in str(e).lower(): | |
return jsonify({"error": f"Language '{lang}' is not supported."}), 400 | |
else: | |
return jsonify({"error": str(e)}), 400 | |
if __name__ == "__main__": | |
app.run(debug=True, port=80, host="0.0.0.0") | |