Ralqasimi commited on
Commit
a99e8b1
·
verified ·
1 Parent(s): 9185bbd

Update knowledge_base.py

Browse files
Files changed (1) hide show
  1. knowledge_base.py +17 -32
knowledge_base.py CHANGED
@@ -1,10 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Create FAISS index
2
  def create_faiss_index(texts):
3
  """
4
  Create a FAISS index from the provided list of texts.
5
  """
6
- import faiss
7
- from sentence_transformers import SentenceTransformer
8
 
9
  # Load pre-trained SentenceTransformer model
10
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@@ -17,14 +31,11 @@ def create_faiss_index(texts):
17
 
18
  return index, texts
19
 
20
-
21
  # Search the FAISS index
22
  def search_faiss(faiss_index, stored_texts, query, top_k=3):
23
  """
24
  Search the FAISS index for the most relevant texts based on the query.
25
  """
26
- from sentence_transformers import SentenceTransformer
27
-
28
  # Load the same model used for indexing
29
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
30
 
@@ -37,30 +48,4 @@ def search_faiss(faiss_index, stored_texts, query, top_k=3):
37
  # Retrieve the corresponding texts
38
  results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
39
 
40
- return results
41
-
42
- import re
43
-
44
- def clean_text(text):
45
- """
46
- Cleans text by removing unnecessary symbols and whitespace.
47
- """
48
- text = re.sub(r"\s+", " ", text) # Replace multiple spaces with one
49
- text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text) # Keep Arabic, English, and punctuation
50
- return text.strip()
51
-
52
- def create_faiss_index(texts):
53
- from sentence_transformers import SentenceTransformer
54
- import faiss
55
-
56
- # Clean the text before indexing
57
- texts = [clean_text(t) for t in texts]
58
-
59
- model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
60
- embeddings = model.encode(texts)
61
-
62
- dimension = embeddings.shape[1]
63
- index = faiss.IndexFlatL2(dimension)
64
- index.add(embeddings)
65
-
66
- return index, texts
 
1
+ # Import necessary modules
2
+ import re
3
+ import faiss
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Clean text function
7
+ def clean_text(text):
8
+ """
9
+ Cleans text by removing unnecessary symbols and whitespace.
10
+ """
11
+ text = re.sub(r"\s+", " ", text) # Replace multiple spaces with one
12
+ text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text) # Keep Arabic, English, and punctuation
13
+ return text.strip()
14
+
15
  # Create FAISS index
16
  def create_faiss_index(texts):
17
  """
18
  Create a FAISS index from the provided list of texts.
19
  """
20
+ # Clean the text before indexing
21
+ texts = [clean_text(t) for t in texts]
22
 
23
  # Load pre-trained SentenceTransformer model
24
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
31
 
32
  return index, texts
33
 
 
34
  # Search the FAISS index
35
  def search_faiss(faiss_index, stored_texts, query, top_k=3):
36
  """
37
  Search the FAISS index for the most relevant texts based on the query.
38
  """
 
 
39
  # Load the same model used for indexing
40
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
41
 
 
48
  # Retrieve the corresponding texts
49
  results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]
50
 
51
+ return results