Lhumpal commited on
Commit
3e419e2
·
verified ·
1 Parent(s): 7cc632c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -21,9 +21,9 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
21
 
22
  login(token=hf_token)
23
 
24
- def chunk_text(text, chunk_size=250, chunk_overlap=50):
25
  splitter = RecursiveCharacterTextSplitter(
26
- chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n,\n", " ", "\n", "."]
27
  )
28
  chunks = splitter.split_text(text)
29
  return chunks
@@ -70,7 +70,7 @@ text = dataset["facts"]["text"]
70
  text_string = "".join(text)
71
 
72
  # Chunk and index the documents
73
- chunks = chunk_text(text_string, chunk_size=400)
74
  # Build the vectorsore
75
  vectorstore = build_faiss_vectorstore(chunks)
76
 
 
21
 
22
  login(token=hf_token)
23
 
24
+ def chunk_text(text, chunk_size=250, chunk_overlap=0):
25
  splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n,\n", "\n", ".", " "]
27
  )
28
  chunks = splitter.split_text(text)
29
  return chunks
 
70
  text_string = "".join(text)
71
 
72
  # Chunk and index the documents
73
+ chunks = chunk_text(text_string, chunk_size=350)
74
  # Build the vectorsore
75
  vectorstore = build_faiss_vectorstore(chunks)
76