TEST-GIZ-Project-Search

Running on CPU Upgrade

App Files Files Community

ppsingh commited on Dec 9, 2024

Commit

bcc1f88

verified ·

1 Parent(s): 5323bc1

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -24

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from langchain.retrievers.document_compressors import CrossEncoderReranker
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 from appStore.prep_data import process_giz_worldwide
 from appStore.prep_utils import create_documents
-from appStore.embed import hybrid_embed_chunks
 # get the device to be used eithe gpu or cpu
 device = 'cuda' if cuda.is_available() else 'cpu'
@@ -18,21 +18,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
 st.set_page_config(page_title="SEARCH IATI",layout='wide')
 st.title("SEARCH IATI Database")
 var=st.text_input("enter keyword")
-@st.cache_resource
-def get_local_qdrant():
-    """once the local qdrant server is created this is used to make the connection to exisitng server"""
-    qdrant_collections = {}
-    embeddings = HuggingFaceEmbeddings(
-        model_kwargs = {'device': device},
-        encode_kwargs = {'normalize_embeddings': True},
-        model_name='BAAI/bge-m3')
-    client = QdrantClient(path="/data/local_qdrant")
-    print("Collections in local Qdrant:",client.get_collections())
-    qdrant_collections['all'] = Qdrant(client=client, collection_name='all', embeddings=embeddings, )
-    return qdrant_collections
 def get_context(vectorstore,query):
     # create metadata filter
@@ -54,12 +40,12 @@ def get_context(vectorstore,query):
     return context_retrieved
 # first we create the chunks for iati documents
-chunks = process_giz_worldwide()
-for i in range(5):
-    print(i,"\n",chunks.loc[i,'chunks'])
-temp_df = chunks[:5]
-temp_doc = create_documents(temp_df,'chunks')
-hybrid_embed_chunks(temp_doc)
 #print("chunking done")
@@ -67,9 +53,11 @@ hybrid_embed_chunks(temp_doc)
 # once the chunks are done, we perform hybrid emebddings
 #embed_chunks(chunks)
-# vectorstores = get_local_qdrant()
-# vectorstore = vectorstores['all']
 button=st.button("search")
 # results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
 if button:
      st.write(f"Found {len(results)} results for query:{var}")

 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 from appStore.prep_data import process_giz_worldwide
 from appStore.prep_utils import create_documents
+from appStore.embed import hybrid_embed_chunks, get_local_qdrant
 # get the device to be used eithe gpu or cpu
 device = 'cuda' if cuda.is_available() else 'cpu'
 st.set_page_config(page_title="SEARCH IATI",layout='wide')
 st.title("SEARCH IATI Database")
 var=st.text_input("enter keyword")
 def get_context(vectorstore,query):
     # create metadata filter
     return context_retrieved
 # first we create the chunks for iati documents
+#chunks = process_giz_worldwide()
+#for i in range(5):
+#    print(i,"\n",chunks.loc[i,'chunks'])
+#temp_df = chunks[:5]
+#temp_doc = create_documents(temp_df,'chunks')
+#hybrid_embed_chunks(temp_doc)
 #print("chunking done")
 # once the chunks are done, we perform hybrid emebddings
 #embed_chunks(chunks)
+vectorstores = get_local_qdrant('giz_worldwide')
+vectorstore = vectorstores['giz_worldwide']
 button=st.button("search")
+found_docs = vectorstore.similarity_search(var)
+print(found_docs)
 # results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
 if button:
      st.write(f"Found {len(results)} results for query:{var}")