Spaces:
Sleeping
Sleeping
from typing import AsyncIterable, List | |
from langchain_core.document_loaders.blob_loaders import BlobLoader | |
from langchain_community.document_loaders.generic import GenericLoader | |
from langchain_core.documents import Document | |
from langchain_community.document_loaders.base import BaseBlobParser | |
# Extend the base GenericLoader class | |
class CustomGenericLoader(GenericLoader): | |
def __init__(self, blob_loader: BlobLoader, blob_parser: BaseBlobParser): | |
super().__init__(blob_loader, blob_parser) | |
async def async_load(self) -> AsyncIterable[Document]: | |
""" | |
Asynchronously loads and parses documents from blobs. | |
""" | |
async for blob in self.blob_loader.yield_blobs(): | |
document = self.blob_parser.lazy_parse(blob) | |
yield document | |
async def lazy_load(self) -> AsyncIterable[Document]: | |
""" | |
A lazy asynchronous load method that can be overridden for more custom behavior. | |
""" | |
async for document in self.async_load(): | |
yield document | |
async def load_all(self) -> List[Document]: | |
""" | |
Asynchronously loads all documents and returns them as a list. | |
""" | |
documents = [] | |
async for document in self.async_load(): | |
documents.append(document) | |
return documents | |