tferhan commited on
Commit
267b2e4
·
verified ·
1 Parent(s): 3bf1206

Update qa_txt.py

Browse files
Files changed (1) hide show
  1. qa_txt.py +11 -2
qa_txt.py CHANGED
@@ -1,5 +1,5 @@
1
  from langchain_community.document_loaders import TextLoader
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain.chains import ConversationalRetrievalChain
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.llms import HuggingFaceEndpoint
@@ -10,10 +10,19 @@ import tqdm
10
  from langchain_community.vectorstores import FAISS
11
  import accelerate
12
 
 
 
 
 
 
 
 
 
 
13
  def load_doc(file_path):
14
  loader = TextLoader(file_path)
15
  pages = loader.load()
16
- text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1212, chunk_overlap = 0)
17
  doc_splits = text_splitter.split_documents(pages)
18
  return doc_splits
19
 
 
1
  from langchain_community.document_loaders import TextLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
3
  from langchain.chains import ConversationalRetrievalChain
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.llms import HuggingFaceEndpoint
 
10
  from langchain_community.vectorstores import FAISS
11
  import accelerate
12
 
13
+ c_splitter = CharacterTextSplitter(
14
+
15
+ chunk_size = 350,
16
+ chunk_overlap = 4,
17
+ separator = """,
18
+ ]""",
19
+
20
+ )
21
+
22
  def load_doc(file_path):
23
  loader = TextLoader(file_path)
24
  pages = loader.load()
25
+ text_splitter = c_splitter,
26
  doc_splits = text_splitter.split_documents(pages)
27
  return doc_splits
28