Lhumpal commited on
Commit
cb76d80
·
verified ·
1 Parent(s): 7c0b37b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -19,9 +19,9 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
19
 
20
  login(token=hf_token)
21
 
22
- def chunk_text(text, chunk_size=50, chunk_overlap=10):
23
  splitter = RecursiveCharacterTextSplitter(
24
- chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=[" ", "\n"]
25
  )
26
  chunks = splitter.split_text(text)
27
  return chunks
@@ -56,7 +56,7 @@ concise_text = dataset["concise"]["text"]
56
  concise_text_string = "".join(concise_text)
57
 
58
  # Chunk and index the documents
59
- chunks = chunk_text(concise_text_string, chunk_size=30) # chunks are 30 words each
60
  # Build the vectorsore
61
  vectorstore = build_faiss_vectorstore(chunks)
62
 
 
19
 
20
  login(token=hf_token)
21
 
22
+ def chunk_text(text, chunk_size=250, chunk_overlap=50):
23
  splitter = RecursiveCharacterTextSplitter(
24
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=[" ", "\n", "."]
25
  )
26
  chunks = splitter.split_text(text)
27
  return chunks
 
56
  concise_text_string = "".join(concise_text)
57
 
58
  # Chunk and index the documents
59
+ chunks = chunk_text(concise_text_string, chunk_size=300) # chunks are 30 words each
60
  # Build the vectorsore
61
  vectorstore = build_faiss_vectorstore(chunks)
62