Spaces:
Runtime error
Runtime error
File size: 574 Bytes
7f7b773 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline
import spacy
from typing import Iterable, Iterator
from langchain.docstore.document import Document
from langchain.text_splitter import SpacyTextSplitter
class SpacySplitter:
def __init__(self):
self.splitter = SpacyTextSplitter(chunk_size=1000, pipeline="en_core_web_trf")
def split_documents(self, docs: Iterable[Document]) -> Iterator[Document]:
spacy.prefer_gpu(gpu_id=1)
chunks = self.splitter.split_documents(docs)
return chunks
|