ppsingh commited on
Commit
bcc1f88
·
verified ·
1 Parent(s): 5323bc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -9,7 +9,7 @@ from langchain.retrievers.document_compressors import CrossEncoderReranker
9
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
10
  from appStore.prep_data import process_giz_worldwide
11
  from appStore.prep_utils import create_documents
12
- from appStore.embed import hybrid_embed_chunks
13
 
14
  # get the device to be used eithe gpu or cpu
15
  device = 'cuda' if cuda.is_available() else 'cpu'
@@ -18,21 +18,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
18
  st.set_page_config(page_title="SEARCH IATI",layout='wide')
19
  st.title("SEARCH IATI Database")
20
  var=st.text_input("enter keyword")
21
-
22
-
23
- @st.cache_resource
24
- def get_local_qdrant():
25
- """once the local qdrant server is created this is used to make the connection to exisitng server"""
26
-
27
- qdrant_collections = {}
28
- embeddings = HuggingFaceEmbeddings(
29
- model_kwargs = {'device': device},
30
- encode_kwargs = {'normalize_embeddings': True},
31
- model_name='BAAI/bge-m3')
32
- client = QdrantClient(path="/data/local_qdrant")
33
- print("Collections in local Qdrant:",client.get_collections())
34
- qdrant_collections['all'] = Qdrant(client=client, collection_name='all', embeddings=embeddings, )
35
- return qdrant_collections
36
 
37
  def get_context(vectorstore,query):
38
  # create metadata filter
@@ -54,12 +40,12 @@ def get_context(vectorstore,query):
54
  return context_retrieved
55
 
56
  # first we create the chunks for iati documents
57
- chunks = process_giz_worldwide()
58
- for i in range(5):
59
- print(i,"\n",chunks.loc[i,'chunks'])
60
- temp_df = chunks[:5]
61
- temp_doc = create_documents(temp_df,'chunks')
62
- hybrid_embed_chunks(temp_doc)
63
 
64
 
65
  #print("chunking done")
@@ -67,9 +53,11 @@ hybrid_embed_chunks(temp_doc)
67
  # once the chunks are done, we perform hybrid emebddings
68
  #embed_chunks(chunks)
69
 
70
- # vectorstores = get_local_qdrant()
71
- # vectorstore = vectorstores['all']
72
  button=st.button("search")
 
 
73
  # results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
74
  if button:
75
  st.write(f"Found {len(results)} results for query:{var}")
 
9
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
10
  from appStore.prep_data import process_giz_worldwide
11
  from appStore.prep_utils import create_documents
12
+ from appStore.embed import hybrid_embed_chunks, get_local_qdrant
13
 
14
  # get the device to be used eithe gpu or cpu
15
  device = 'cuda' if cuda.is_available() else 'cpu'
 
18
  st.set_page_config(page_title="SEARCH IATI",layout='wide')
19
  st.title("SEARCH IATI Database")
20
  var=st.text_input("enter keyword")
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def get_context(vectorstore,query):
24
  # create metadata filter
 
40
  return context_retrieved
41
 
42
  # first we create the chunks for iati documents
43
+ #chunks = process_giz_worldwide()
44
+ #for i in range(5):
45
+ # print(i,"\n",chunks.loc[i,'chunks'])
46
+ #temp_df = chunks[:5]
47
+ #temp_doc = create_documents(temp_df,'chunks')
48
+ #hybrid_embed_chunks(temp_doc)
49
 
50
 
51
  #print("chunking done")
 
53
  # once the chunks are done, we perform hybrid emebddings
54
  #embed_chunks(chunks)
55
 
56
+ vectorstores = get_local_qdrant('giz_worldwide')
57
+ vectorstore = vectorstores['giz_worldwide']
58
  button=st.button("search")
59
+ found_docs = vectorstore.similarity_search(var)
60
+ print(found_docs)
61
  # results= get_context(vectorstore, f"find the relvant paragraphs for: {var}")
62
  if button:
63
  st.write(f"Found {len(results)} results for query:{var}")