Spaces:
Runtime error
Runtime error
import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline | |
import spacy | |
from typing import Iterable, Iterator | |
from langchain.docstore.document import Document | |
from langchain.text_splitter import SpacyTextSplitter | |
class SpacySplitter: | |
def __init__(self): | |
self.splitter = SpacyTextSplitter(chunk_size=1000, pipeline="en_core_web_trf") | |
def split_documents(self, docs: Iterable[Document]) -> Iterator[Document]: | |
spacy.prefer_gpu(gpu_id=1) | |
chunks = self.splitter.split_documents(docs) | |
return chunks | |