melk2025 commited on
Commit
37bc19c
·
verified ·
1 Parent(s): c572f39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -50,7 +50,8 @@ collection = client.get_or_create_collection(
50
  embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
51
 
52
  # Initialize the text splitter
53
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
 
54
 
55
  total_chunks = 0
56
 
@@ -120,7 +121,7 @@ def rerank_with_bm25(docs, query):
120
  tokenized_query = clean_and_tokenize(query, lang)
121
  scores = bm25.get_scores(tokenized_query)
122
 
123
- top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:2]
124
  return [docs[i] for i in top_indices]
125
 
126
 
 
50
  embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
51
 
52
  # Initialize the text splitter
53
+ #text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
54
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
55
 
56
  total_chunks = 0
57
 
 
121
  tokenized_query = clean_and_tokenize(query, lang)
122
  scores = bm25.get_scores(tokenized_query)
123
 
124
+ top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:3]
125
  return [docs[i] for i in top_indices]
126
 
127