Spaces:

Ralqasimi
/

Chatbot

Sleeping

App Files Files Community

Ralqasimi commited on Feb 7

Commit

a99e8b1

verified ·

1 Parent(s): 9185bbd

Update knowledge_base.py

Browse files

Files changed (1) hide show

knowledge_base.py +17 -32

knowledge_base.py CHANGED Viewed

@@ -1,10 +1,24 @@
 # Create FAISS index
 def create_faiss_index(texts):
     """
     Create a FAISS index from the provided list of texts.
     """
-    import faiss
-    from sentence_transformers import SentenceTransformer
     # Load pre-trained SentenceTransformer model
     model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@@ -17,14 +31,11 @@ def create_faiss_index(texts):
     return index, texts
 # Search the FAISS index
 def search_faiss(faiss_index, stored_texts, query, top_k=3):
     """
     Search the FAISS index for the most relevant texts based on the query.
     """
-    from sentence_transformers import SentenceTransformer
     # Load the same model used for indexing
     model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@@ -37,30 +48,4 @@ def search_faiss(faiss_index, stored_texts, query, top_k=3):
     # Retrieve the corresponding texts
     results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
-    return results
-    import re
-def clean_text(text):
-    """
-    Cleans text by removing unnecessary symbols and whitespace.
-    """
-    text = re.sub(r"\s+", " ", text)  # Replace multiple spaces with one
-    text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text)  # Keep Arabic, English, and punctuation
-    return text.strip()
-def create_faiss_index(texts):
-    from sentence_transformers import SentenceTransformer
-    import faiss
-    # Clean the text before indexing
-    texts = [clean_text(t) for t in texts]
-    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-    embeddings = model.encode(texts)
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(embeddings)
-    return index, texts

+# Import necessary modules
+import re
+import faiss
+from sentence_transformers import SentenceTransformer
+# Clean text function
+def clean_text(text):
+    """
+    Cleans text by removing unnecessary symbols and whitespace.
+    """
+    text = re.sub(r"\s+", " ", text)  # Replace multiple spaces with one
+    text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text)  # Keep Arabic, English, and punctuation
+    return text.strip()
 # Create FAISS index
 def create_faiss_index(texts):
     """
     Create a FAISS index from the provided list of texts.
     """
+    # Clean the text before indexing
+    texts = [clean_text(t) for t in texts]
     # Load pre-trained SentenceTransformer model
     model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
     return index, texts
 # Search the FAISS index
 def search_faiss(faiss_index, stored_texts, query, top_k=3):
     """
     Search the FAISS index for the most relevant texts based on the query.
     """
     # Load the same model used for indexing
     model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
     # Retrieve the corresponding texts
     results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
+    return results