Spaces:
Runtime error
Runtime error
from langchain.document_loaders import UnstructuredWordDocumentLoader, DirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
def data_loader(data): | |
loader = DirectoryLoader( | |
data, | |
glob=("*.docx"), | |
loader_cls=UnstructuredWordDocumentLoader, | |
) | |
return loader.load() | |
def chunk_text(extracted_data): | |
text_spliter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
text_chunk = text_spliter.split_documents(extracted_data) | |
return text_chunk | |
def download_hugging_face_embeddings(): | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
return embeddings |