File size: 1,198 Bytes
7f7b773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from typing import Iterable, Iterator
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceHubEmbeddings # TODO check HuggingFaceInstructEmbeddings

class HuggingFaceTextEmbedding:
    def __init__(self) -> None:
        model_name = "sentence-transformers/all-mpnet-base-v2"
        model_kwargs = {'device': 'cpu'}
        encode_kwargs = {'normalize_embeddings': False}
        self.model = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )

    def embed_documents(self, docs: Iterable[Document]) -> Iterator[Document]:
        embeddings = self.model.embed_documents(docs)
        return embeddings
    
# class HuggingFaceInferenceAPITextEmbedding:
#     def __init__(self) -> None:
#         pass

#     def embed_documents(self, docs: Iterable[Document]) -> Iterator[Document]:
#         embeddings = HuggingFaceInferenceAPIEmbeddings(
#             api_key=inference_api_key,
#             model_name="sentence-transformers/all-MiniLM-l6-v2"
#         )
#         chunks = embeddings.embed_documents(docs)
#         return chunks