M17idd commited on
Commit
f3e4793
·
verified ·
1 Parent(s): febfa21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -137,14 +137,14 @@ def get_pdf_index():
137
  small_chunks.append(text)
138
 
139
  final_chunks = []
140
- max_tokens = 512
141
 
142
  for chunk in small_chunks:
143
  token_count = count_tokens(chunk, model_name="gpt-3.5-turbo")
144
  if token_count > max_tokens:
145
  splitter_token_safe = RecursiveCharacterTextSplitter(
146
- chunk_size=512,
147
- chunk_overlap=100
148
  )
149
  smaller_chunks = splitter_token_safe.split_text(chunk)
150
  final_chunks.extend(smaller_chunks)
 
137
  small_chunks.append(text)
138
 
139
  final_chunks = []
140
+ max_tokens = 128
141
 
142
  for chunk in small_chunks:
143
  token_count = count_tokens(chunk, model_name="gpt-3.5-turbo")
144
  if token_count > max_tokens:
145
  splitter_token_safe = RecursiveCharacterTextSplitter(
146
+ chunk_size=128,
147
+ chunk_overlap=64
148
  )
149
  smaller_chunks = splitter_token_safe.split_text(chunk)
150
  final_chunks.extend(smaller_chunks)