Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from langchain.retrievers.document_compressors import CrossEncoderReranker
|
|
9 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
10 |
from appStore.prep_data import process_giz_worldwide
|
11 |
from appStore.prep_utils import create_documents
|
12 |
-
from appStore.embed import hybrid_embed_chunks
|
13 |
|
14 |
# get the device to be used eithe gpu or cpu
|
15 |
device = 'cuda' if cuda.is_available() else 'cpu'
|
@@ -18,21 +18,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
|
|
18 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
19 |
st.title("SEARCH IATI Database")
|
20 |
var=st.text_input("enter keyword")
|
21 |
-
|
22 |
-
|
23 |
-
@st.cache_resource
|
24 |
-
def get_local_qdrant():
|
25 |
-
"""once the local qdrant server is created this is used to make the connection to exisitng server"""
|
26 |
-
|
27 |
-
qdrant_collections = {}
|
28 |
-
embeddings = HuggingFaceEmbeddings(
|
29 |
-
model_kwargs = {'device': device},
|
30 |
-
encode_kwargs = {'normalize_embeddings': True},
|
31 |
-
model_name='BAAI/bge-m3')
|
32 |
-
client = QdrantClient(path="/data/local_qdrant")
|
33 |
-
print("Collections in local Qdrant:",client.get_collections())
|
34 |
-
qdrant_collections['all'] = Qdrant(client=client, collection_name='all', embeddings=embeddings, )
|
35 |
-
return qdrant_collections
|
36 |
|
37 |
def get_context(vectorstore,query):
|
38 |
# create metadata filter
|
@@ -54,12 +40,12 @@ def get_context(vectorstore,query):
|
|
54 |
return context_retrieved
|
55 |
|
56 |
# first we create the chunks for iati documents
|
57 |
-
chunks = process_giz_worldwide()
|
58 |
-
for i in range(5):
|
59 |
-
print(i,"\n",chunks.loc[i,'chunks'])
|
60 |
-
temp_df = chunks[:5]
|
61 |
-
temp_doc = create_documents(temp_df,'chunks')
|
62 |
-
hybrid_embed_chunks(temp_doc)
|
63 |
|
64 |
|
65 |
#print("chunking done")
|
@@ -67,9 +53,11 @@ hybrid_embed_chunks(temp_doc)
|
|
67 |
# once the chunks are done, we perform hybrid emebddings
|
68 |
#embed_chunks(chunks)
|
69 |
|
70 |
-
|
71 |
-
|
72 |
button=st.button("search")
|
|
|
|
|
73 |
# results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
|
74 |
if button:
|
75 |
st.write(f"Found {len(results)} results for query:{var}")
|
|
|
9 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
10 |
from appStore.prep_data import process_giz_worldwide
|
11 |
from appStore.prep_utils import create_documents
|
12 |
+
from appStore.embed import hybrid_embed_chunks, get_local_qdrant
|
13 |
|
14 |
# get the device to be used eithe gpu or cpu
|
15 |
device = 'cuda' if cuda.is_available() else 'cpu'
|
|
|
18 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
19 |
st.title("SEARCH IATI Database")
|
20 |
var=st.text_input("enter keyword")
|
21 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def get_context(vectorstore,query):
|
24 |
# create metadata filter
|
|
|
40 |
return context_retrieved
|
41 |
|
42 |
# first we create the chunks for iati documents
|
43 |
+
#chunks = process_giz_worldwide()
|
44 |
+
#for i in range(5):
|
45 |
+
# print(i,"\n",chunks.loc[i,'chunks'])
|
46 |
+
#temp_df = chunks[:5]
|
47 |
+
#temp_doc = create_documents(temp_df,'chunks')
|
48 |
+
#hybrid_embed_chunks(temp_doc)
|
49 |
|
50 |
|
51 |
#print("chunking done")
|
|
|
53 |
# once the chunks are done, we perform hybrid emebddings
|
54 |
#embed_chunks(chunks)
|
55 |
|
56 |
+
vectorstores = get_local_qdrant('giz_worldwide')
|
57 |
+
vectorstore = vectorstores['giz_worldwide']
|
58 |
button=st.button("search")
|
59 |
+
found_docs = vectorstore.similarity_search(var)
|
60 |
+
print(found_docs)
|
61 |
# results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
|
62 |
if button:
|
63 |
st.write(f"Found {len(results)} results for query:{var}")
|