Update app.py
Browse files
app.py
CHANGED
@@ -21,9 +21,9 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
|
|
21 |
|
22 |
login(token=hf_token)
|
23 |
|
24 |
-
def chunk_text(text, chunk_size=250, chunk_overlap=
|
25 |
splitter = RecursiveCharacterTextSplitter(
|
26 |
-
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n,\n", "
|
27 |
)
|
28 |
chunks = splitter.split_text(text)
|
29 |
return chunks
|
@@ -70,7 +70,7 @@ text = dataset["facts"]["text"]
|
|
70 |
text_string = "".join(text)
|
71 |
|
72 |
# Chunk and index the documents
|
73 |
-
chunks = chunk_text(text_string, chunk_size=
|
74 |
# Build the vectorsore
|
75 |
vectorstore = build_faiss_vectorstore(chunks)
|
76 |
|
|
|
21 |
|
22 |
login(token=hf_token)
|
23 |
|
24 |
+
def chunk_text(text, chunk_size=250, chunk_overlap=0):
|
25 |
splitter = RecursiveCharacterTextSplitter(
|
26 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n,\n", "\n", ".", " "]
|
27 |
)
|
28 |
chunks = splitter.split_text(text)
|
29 |
return chunks
|
|
|
70 |
text_string = "".join(text)
|
71 |
|
72 |
# Chunk and index the documents
|
73 |
+
chunks = chunk_text(text_string, chunk_size=350)
|
74 |
# Build the vectorsore
|
75 |
vectorstore = build_faiss_vectorstore(chunks)
|
76 |
|