Update app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,8 @@ collection = client.get_or_create_collection(
|
|
50 |
embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
|
51 |
|
52 |
# Initialize the text splitter
|
53 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
|
|
|
54 |
|
55 |
total_chunks = 0
|
56 |
|
@@ -120,7 +121,7 @@ def rerank_with_bm25(docs, query):
|
|
120 |
tokenized_query = clean_and_tokenize(query, lang)
|
121 |
scores = bm25.get_scores(tokenized_query)
|
122 |
|
123 |
-
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:
|
124 |
return [docs[i] for i in top_indices]
|
125 |
|
126 |
|
|
|
50 |
embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
|
51 |
|
52 |
# Initialize the text splitter
|
53 |
+
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
|
54 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
|
55 |
|
56 |
total_chunks = 0
|
57 |
|
|
|
121 |
tokenized_query = clean_and_tokenize(query, lang)
|
122 |
scores = bm25.get_scores(tokenized_query)
|
123 |
|
124 |
+
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:3]
|
125 |
return [docs[i] for i in top_indices]
|
126 |
|
127 |
|