Spaces:
Running
Running
Delete embeddings
Browse files- embeddings/embedder.py +0 -17
embeddings/embedder.py
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
from transformers import AutoTokenizer, AutoModel
|
2 |
-
import torch
|
3 |
-
import numpy as np
|
4 |
-
from typing import List
|
5 |
-
|
6 |
-
class Embedder:
|
7 |
-
def __init__(self, model_name: str = "BAAI/bge-m3"):
|
8 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
-
self.model = AutoModel.from_pretrained(model_name)
|
10 |
-
|
11 |
-
def embed(self, texts: List[str]) -> np.ndarray:
|
12 |
-
inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
|
13 |
-
with torch.no_grad():
|
14 |
-
outputs = self.model(**inputs)
|
15 |
-
embeddings = outputs.last_hidden_state[:, 0] # lấy embedding từ CLS token
|
16 |
-
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
|
17 |
-
return embeddings.cpu().numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|