Update app.py
Browse files
app.py
CHANGED
@@ -19,9 +19,9 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
|
|
19 |
|
20 |
login(token=hf_token)
|
21 |
|
22 |
-
def chunk_text(text, chunk_size=
|
23 |
splitter = RecursiveCharacterTextSplitter(
|
24 |
-
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=[" ", "\n"]
|
25 |
)
|
26 |
chunks = splitter.split_text(text)
|
27 |
return chunks
|
@@ -56,7 +56,7 @@ concise_text = dataset["concise"]["text"]
|
|
56 |
concise_text_string = "".join(concise_text)
|
57 |
|
58 |
# Chunk and index the documents
|
59 |
-
chunks = chunk_text(concise_text_string, chunk_size=
|
60 |
# Build the vectorsore
|
61 |
vectorstore = build_faiss_vectorstore(chunks)
|
62 |
|
|
|
19 |
|
20 |
login(token=hf_token)
|
21 |
|
22 |
+
def chunk_text(text, chunk_size=250, chunk_overlap=50):
|
23 |
splitter = RecursiveCharacterTextSplitter(
|
24 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=[" ", "\n", "."]
|
25 |
)
|
26 |
chunks = splitter.split_text(text)
|
27 |
return chunks
|
|
|
56 |
concise_text_string = "".join(concise_text)
|
57 |
|
58 |
# Chunk and index the documents
|
59 |
+
chunks = chunk_text(concise_text_string, chunk_size=300) # chunks are 30 words each
|
60 |
# Build the vectorsore
|
61 |
vectorstore = build_faiss_vectorstore(chunks)
|
62 |
|