Hieucyber2208 commited on
Commit
55af983
·
verified ·
1 Parent(s): c30e47d

Delete embeddings

Browse files
Files changed (1) hide show
  1. embeddings/embedder.py +0 -17
embeddings/embedder.py DELETED
@@ -1,17 +0,0 @@
1
- from transformers import AutoTokenizer, AutoModel
2
- import torch
3
- import numpy as np
4
- from typing import List
5
-
6
- class Embedder:
7
- def __init__(self, model_name: str = "BAAI/bge-m3"):
8
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- self.model = AutoModel.from_pretrained(model_name)
10
-
11
- def embed(self, texts: List[str]) -> np.ndarray:
12
- inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
13
- with torch.no_grad():
14
- outputs = self.model(**inputs)
15
- embeddings = outputs.last_hidden_state[:, 0] # lấy embedding từ CLS token
16
- embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
17
- return embeddings.cpu().numpy()