Spaces:

Ralqasimi
/

Chatbot

Sleeping

Ralqasimi commited on Feb 7

Commit

9185bbd

verified ·

1 Parent(s): f6c61f9

Update knowledge_base.py

Files changed (1) hide show

knowledge_base.py CHANGED Viewed

@@ -37,4 +37,30 @@ def search_faiss(faiss_index, stored_texts, query, top_k=3):
     # Retrieve the corresponding texts
     results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
-    return results

     # Retrieve the corresponding texts
     results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
+    return results
+    import re
+def clean_text(text):
+    """
+    Cleans text by removing unnecessary symbols and whitespace.
+    """
+    text = re.sub(r"\s+", " ", text)  # Replace multiple spaces with one
+    text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text)  # Keep Arabic, English, and punctuation
+    return text.strip()
+def create_faiss_index(texts):
+    from sentence_transformers import SentenceTransformer
+    import faiss
+    # Clean the text before indexing
+    texts = [clean_text(t) for t in texts]
+    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+    embeddings = model.encode(texts)
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(embeddings)
+    return index, texts