Tollef Jørgensen commited on
Commit
acc49a5
·
1 Parent(s): d45f5b7
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +36 -16
  3. faiss.lookup.csv +0 -0
  4. faiss.index.zip → files.zip +2 -2
  5. prep.py +5 -2
.gitignore CHANGED
@@ -1 +1,2 @@
1
  *.index
 
 
1
  *.index
2
+ *.csv
app.py CHANGED
@@ -1,16 +1,22 @@
 
1
  import faiss
2
  import gradio as gr
3
  import numpy as np
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer
6
  import zipfile
 
 
 
 
7
 
8
- index_file = "faiss.index.zip"
9
 
10
- with zipfile.ZipFile(index_file, "r") as z:
 
11
  z.extractall()
12
 
13
  pr_number = 14
 
14
  model = SentenceTransformer(
15
  "intfloat/multilingual-e5-small",
16
  revision=f"refs/pr/{pr_number}",
@@ -36,7 +42,7 @@ class FaissIndex:
36
  def extract_docs(self, indices, k):
37
  indices = list(indices[0])
38
  lookup = self.df.iloc[indices]
39
- questions = lookup["question"].values
40
  answers = lookup["answer"].values
41
 
42
  pairs = list(zip(questions, answers))
@@ -51,7 +57,7 @@ class FaissIndex:
51
  # format pairs as: f"{answer}\n{kilde: {question}}"
52
  formatted_pairs = []
53
  for pair in filtered_pairs:
54
- formatted_pairs.append(f"{pair[1]} (kilde: {pair[0]})")
55
  return formatted_pairs
56
 
57
  def search(self, query: str, k: int = 5):
@@ -62,10 +68,13 @@ class FaissIndex:
62
  return self.extract_docs(indices, k)
63
 
64
 
 
65
  index = FaissIndex(model)
66
 
67
 
68
  def query_faiss_index(søketekst):
 
 
69
  """
70
  Queries the FAISS index with the provided search text and returns the top 5 results.
71
  Args:
@@ -74,20 +83,31 @@ def query_faiss_index(søketekst):
74
  str: A string containing the top 5 search results, separated by double newlines.
75
  """
76
 
77
- results = index.search(søketekst, k=5)
78
  return "\n\n".join(results)
79
 
80
 
81
  # Create the Gradio interface
82
- iface = gr.Interface(
83
- fn=query_faiss_index,
84
- inputs=gr.Textbox(lines=2, placeholder="Søk etter info i SIKT", interactive=True),
85
- outputs=gr.Textbox(label="Søkeresultater", type="text", lines=15),
86
- title="SIKT-FAQ",
87
- description="Semantisk søk i SIKT med Openvino.",
88
- live=True
89
- )
90
-
 
 
 
 
 
 
 
 
 
 
 
91
  # Launch the Gradio app
92
- if __name__ == "__main__":
93
- iface.launch()
 
1
+
2
  import faiss
3
  import gradio as gr
4
  import numpy as np
5
  import pandas as pd
6
  from sentence_transformers import SentenceTransformer
7
  import zipfile
8
+ import os
9
+ import logging
10
+
11
+ logging.basicConfig(level=logging.ERROR)
12
 
 
13
 
14
+ # if not os.path.exists("faiss.index"):
15
+ with zipfile.ZipFile("files.zip", "r") as z:
16
  z.extractall()
17
 
18
  pr_number = 14
19
+ logging.info("Loading embedding model")
20
  model = SentenceTransformer(
21
  "intfloat/multilingual-e5-small",
22
  revision=f"refs/pr/{pr_number}",
 
42
  def extract_docs(self, indices, k):
43
  indices = list(indices[0])
44
  lookup = self.df.iloc[indices]
45
+ questions = lookup["query"].values
46
  answers = lookup["answer"].values
47
 
48
  pairs = list(zip(questions, answers))
 
57
  # format pairs as: f"{answer}\n{kilde: {question}}"
58
  formatted_pairs = []
59
  for pair in filtered_pairs:
60
+ formatted_pairs.append(f"{pair[1]}")
61
  return formatted_pairs
62
 
63
  def search(self, query: str, k: int = 5):
 
68
  return self.extract_docs(indices, k)
69
 
70
 
71
+ logging.info("Loading FAISS index")
72
  index = FaissIndex(model)
73
 
74
 
75
  def query_faiss_index(søketekst):
76
+ if len(søketekst) < 3:
77
+ return
78
  """
79
  Queries the FAISS index with the provided search text and returns the top 5 results.
80
  Args:
 
83
  str: A string containing the top 5 search results, separated by double newlines.
84
  """
85
 
86
+ results = index.search(søketekst, k=3)
87
  return "\n\n".join(results)
88
 
89
 
90
  # Create the Gradio interface
91
+ # iface = gr.Interface(
92
+ # fn=query_faiss_index,
93
+ # inputs=gr.Textbox(lines=2, placeholder="Søk etter info i SIKT", interactive=True, min_width="30vw"),
94
+ # outputs=gr.Textbox(label="Søkeresultater", type="text", lines=20, min_width="70vw"),
95
+ # title="SIKT-FAQ",
96
+ # description="Semantisk søk i SIKT med Openvino.",
97
+ # live=True
98
+ # )
99
+
100
+ with gr.Blocks() as blocks:
101
+ gr.Markdown("## SIKT-FAQ")
102
+ with gr.Row():
103
+ box_search = gr.Textbox(label="Søk etter informasjon i SIKT", lines=1, placeholder="Innlogging i FEIDE...", interactive=True)
104
+ with gr.Row():
105
+ box_output = gr.Textbox(label="Søkeresultater", type="text", lines=20)
106
+
107
+ box_search.change(fn=query_faiss_index, inputs=box_search, outputs=box_output, max_batch_size=1)
108
+
109
+
110
+ blocks.launch()
111
  # Launch the Gradio app
112
+ # if __name__ == "__main__":
113
+ # iface.launch()
faiss.lookup.csv DELETED
The diff for this file is too large to render. See raw diff
 
faiss.index.zip → files.zip RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85d3ff0d5a335201c3b92fb3466df469c4f3fb9569a087166d393ca7801527e
3
- size 6895269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8739c76e681f900923b900c9df0ef75cf421d39cabb54650c4b9ad19b6a76d85
3
+ size 22
prep.py CHANGED
@@ -1,5 +1,8 @@
1
  import zipfile
2
 
3
  index_file = "faiss.index"
4
- with zipfile.ZipFile(index_file + ".zip", "w") as z:
5
- z.write(index_file)
 
 
 
 
1
  import zipfile
2
 
3
  index_file = "faiss.index"
4
+ lookup_file = "faiss.lookup.csv"
5
+
6
+ with zipfile.ZipFile("files.zip", "w") as z:
7
+ z.write(index_file)
8
+ z.write(lookup_file)