import os from dotenv import load_dotenv import torch # from huggingface_hub import login from transformers import AutoTokenizer, AutoModel load_dotenv() huggingface_token = os.environ.get("HF_TOKEN", "") # login(huggingface_token) # Model is private? # auto_tokenizer = AutoTokenizer.from_pretrained( # "CocoonBusiness/VectorSearch", token=huggingface_token, low_cpu_mem_usage=True # ) auto_tokenizer = AutoTokenizer.from_pretrained("xValentim/vector-search-bert-based", low_cpu_mem_usage=True) model = AutoModel.from_pretrained( "jegorkitskerkin/robbert-v2-dutch-base-mqa-finetuned", low_cpu_mem_usage=True ) def get_embeddings(text_list): encoded_input = auto_tokenizer( text_list, padding=True, truncation=True, max_length=500, return_tensors="pt", add_special_tokens=True, ) model_output = model(**encoded_input) embeddings = mean_pooling(model_output, encoded_input["attention_mask"]) # Make 1D vector return embeddings.detach().numpy().flatten().tolist() def mean_pooling(model_output, attention_mask): token_embeddings = model_output[0] input_mask_expanded = ( attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() ) return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( input_mask_expanded.sum(1), min=1e-9 )