Spaces:

aibmedia
/

aibsimilarityllm

Sleeping

App Files Files Community

aibmedia commited on Dec 17, 2024

Commit

dec3f0d

verified ·

1 Parent(s): 5c1eb47

Update main.py

Browse files

Files changed (1) hide show

main.py +53 -137

main.py CHANGED Viewed

@@ -22,7 +22,7 @@ API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arct
-search = GoogleSearchAPIWrapper(k=20)
 bearer = "Bearer " + os.getenv('TOKEN')
 headers = {"Authorization": bearer }
@@ -47,25 +47,24 @@ def server_one():
 @app.route('/')
 async def server_1():
     query_sentence = "Obama's first name"
     duck_results = []
     try:
-        searchduck = DuckDuckGoSearchResults(output_format="list" ,max_results=5, num_results=5)
         duck_results = searchduck.invoke(query_sentence)
     except:
       print("An exception occurred")
     tool = Tool(
         name="google_search",
         description="Search Google for recent results.",
         func=search.run,
     )
     google_results = search.results( query_sentence , 10 )
-    print("type(duck_results)") ;    print(type(duck_results)) ;     print(type(google_results))
     all_results = duck_results + google_results
     # get the snippet put into list
@@ -73,12 +72,12 @@ async def server_1():
     for x in split_query_words:
         if x.isupper():
             important_keywords.append(x)
-## get the longest word in sentence
-# res = "" ; iteratorx = 0
-# for word in split_query_words:
-#     if len(word) > len(res):
-#         res = word
     # get google 20 items
     # get user query in the url param
@@ -89,19 +88,25 @@ async def server_1():
     # payload = {  "inputs": {  "source_sentence": "That is a green painted house",  "sentences": ["The house paint is green",  "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green",   "That house looks green",   "Today the house is clean "  ] } , }
     # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976",  "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines",   "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. "  ] } , }
     # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
     payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
     response0 =  requests.post(API_URL0, headers=headers, json=payload)
     response1 =  requests.post(API_URL1, headers=headers, json=payload)
     response2 =  requests.post(API_URL2, headers=headers, json=payload)
     response3 =  requests.post(API_URL3, headers=headers, json=payload)
     varcontinue_similarity = 0
     print("type( response0.json() )")
     print(type(  response0.json() ))
     print(type(  response1.json() ))
     print(type(  response2.json() ))
     print(type(  response3.json() ))
     if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
         similarity_scores =  response0.json() + response1.json() + response2.json() + response3.json()
         varcontinue_similarity = 1
     else:
         similarity_scores = "There's an error in llm similarity search retrieval"
@@ -113,9 +118,9 @@ async def server_1():
     if varcontinue_similarity == 1 :
         # call processing with 10 google search result or 15 search results
         if len(all_results) == 10 :
-            result_processed = process_similarity_10(all_results , similarity_scores )
         if len(all_results) > 10 :
-            result_processed = process_similarity_15(all_results , similarity_scores )
     # return all_results
     return result_processed
@@ -127,49 +132,15 @@ def threadserver():
-def process_similarity_15(web_results , similarity_scores):
     # print(similarity_scores)
     # print(type(similarity_scores))
     print("length")
-    print(len(similarity_scores))
     key_index = 0
     #copy + loop to get index
-    r_iterator = 0
-    resp_list0 = []
-    resp_list1 = []
-    resp_list2 = []
-    resp_list3 = []
-    for value_inlist in similarity_scores:
-        print(value_inlist)
-        print("index ")
-        print(key_index)
-        if key_index <= 14 :
-            resp_list0.append(value_inlist)
-        if key_index <= 29 and key_index > 14 :
-            resp_list1.append(value_inlist)
-        if key_index <= 44 and key_index > 29 :
-            resp_list2.append(value_inlist)
-        if key_index <= 59 and key_index > 44 :
-            resp_list3.append(value_inlist)
-        key_index = key_index + 1
-    print("The Response list 0 ")
-    print(resp_list0)
-    print("The Response list 1 ")
-    print(resp_list1)
-    print("The Response list 2 ")
-    print(resp_list2)
-    print("The Response list 3 ")
-    print(resp_list3)
-    # sorted 0 - 3 are sorted lists of score ; we must get their indices which is 0-8 that will be mapped to sentence index
-    sorted0 = sorted(resp_list0 , reverse=True)
-    sorted1 = sorted(resp_list1 , reverse=True)
-    sorted2 = sorted(resp_list2 , reverse=True)
-    sorted3 = sorted(resp_list3 , reverse=True)
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
@@ -180,24 +151,24 @@ def process_similarity_15(web_results , similarity_scores):
     sorted0_with_index = []
     for x in sorted0:
-        for y in resp_list0:
             if x == y:
                 print("index of sorted0")
-                print(resp_list0.index(y))
                 if x > 0.90:
-                    sorted0_with_index.append(resp_list0.index(y))
                     print("sorted_with_index")
                     print(sorted0_with_index)
     print("sorted0_with_index")
     print(sorted0_with_index)
     sorted1_with_index = []
     for x in sorted1:
-        for y in resp_list1:
             if x == y:
                 print("index of sorted1")
-                print(resp_list1.index(y))
                 if y > 0.90:
-                    sorted1_with_index.append(resp_list1.index(y))
                     print("sorted_with_index")
                     print(sorted1_with_index)
@@ -206,15 +177,15 @@ def process_similarity_15(web_results , similarity_scores):
     sorted2_with_index = []
     print("b4 for x in sorted2:")
-    print("resp_list2:" + str(resp_list2))
     print("sorted:" + str(sorted2))
     for x in sorted2:
-        for y in resp_list2:
             if x == y:
                 print("index of sorted2")
-                print(resp_list2.index(y))
                 if y > 0.90:
-                    sorted2_with_index.append(resp_list2.index(y))
                     print("sorted_with_index")
                     print(sorted2_with_index)
@@ -222,14 +193,14 @@ def process_similarity_15(web_results , similarity_scores):
     print(sorted2_with_index)
     sorted3_with_index = []
     print("b4 for x in sorted3:")
-    print("resp_list3:" + str(resp_list3))
     for x in sorted3:
-        for y in resp_list3:
             if x == y:
                 print("index of sorted3")
-                print(resp_list3.index(y))
                 if y > 0.90:
-                    sorted3_with_index.append(resp_list3.index(y))
                     print("sorted_with_index")
                     print(sorted3_with_index)
@@ -318,52 +289,16 @@ def process_similarity_15(web_results , similarity_scores):
         print("No reliable similarity found by 4 llms")
     return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
-def process_similarity_10(web_results , similarity_scores):
     # print(similarity_scores)
     # print(type(similarity_scores))
     print("length")
-    print(len(similarity_scores))
     key_index = 0
     #copy + loop to get index
-    r_iterator = 0
-    resp_list0 = []
-    resp_list1 = []
-    resp_list2 = []
-    resp_list3 = []
-    for value_inlist in similarity_scores:
-        print(value_inlist)
-        print("index ")
-        print(key_index)
-        if key_index <= 9 :
-            resp_list0.append(value_inlist)
-        if key_index <= 19 and key_index > 9 :
-            resp_list1.append(value_inlist)
-        if key_index <= 29 and key_index > 19 :
-            resp_list2.append(value_inlist)
-        if key_index <= 39 and key_index > 29 :
-            resp_list3.append(value_inlist)
-        key_index = key_index + 1
-    print("The Response list 0 ")
-    print(resp_list0)
-    print("The Response list 1 ")
-    print(resp_list1)
-    print("The Response list 2 ")
-    print(resp_list2)
-    print("The Response list 3 ")
-    print(resp_list3)
-    # sorted 0 - 3 are sorted lists of score ; we must get their indices which is 0-8 that will be mapped to sentence index
-    sorted0 = sorted(resp_list0 , reverse=True)
-    sorted1 = sorted(resp_list1 , reverse=True)
-    sorted2 = sorted(resp_list2 , reverse=True)
-    sorted3 = sorted(resp_list3 , reverse=True)
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
@@ -374,24 +309,24 @@ def process_similarity_10(web_results , similarity_scores):
     sorted0_with_index = []
     for x in sorted0:
-        for y in resp_list0:
             if x == y:
                 print("index of sorted0")
-                print(resp_list0.index(y))
                 if x > 0.90:
-                    sorted0_with_index.append(resp_list0.index(y))
                     print("sorted_with_index")
                     print(sorted0_with_index)
     print("sorted0_with_index")
     print(sorted0_with_index)
     sorted1_with_index = []
     for x in sorted1:
-        for y in resp_list1:
             if x == y:
                 print("index of sorted1")
-                print(resp_list1.index(y))
                 if y > 0.90:
-                    sorted1_with_index.append(resp_list1.index(y))
                     print("sorted_with_index")
                     print(sorted1_with_index)
@@ -400,15 +335,15 @@ def process_similarity_10(web_results , similarity_scores):
     sorted2_with_index = []
     print("b4 for x in sorted2:")
-    print("resp_list2:" + str(resp_list2))
     print("sorted:" + str(sorted2))
     for x in sorted2:
-        for y in resp_list2:
             if x == y:
                 print("index of sorted2")
-                print(resp_list2.index(y))
                 if y > 0.90:
-                    sorted2_with_index.append(resp_list2.index(y))
                     print("sorted_with_index")
                     print(sorted2_with_index)
@@ -416,14 +351,14 @@ def process_similarity_10(web_results , similarity_scores):
     print(sorted2_with_index)
     sorted3_with_index = []
     print("b4 for x in sorted3:")
-    print("resp_list3:" + str(resp_list3))
     for x in sorted3:
-        for y in resp_list3:
             if x == y:
                 print("index of sorted3")
-                print(resp_list3.index(y))
                 if y > 0.90:
-                    sorted3_with_index.append(resp_list3.index(y))
                     print("sorted_with_index")
                     print(sorted3_with_index)
@@ -511,27 +446,8 @@ def process_similarity_10(web_results , similarity_scores):
     else:
         print("No reliable similarity found by 4 llms")
     return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
-    # index_sorted0 = sorted0_with_index[:4]
-    # index_sorted1 = sorted1_with_index[:4]
-    # index_sorted2 = sorted2_with_index[:4]
-    # index_sorted3 = sorted3_with_index[:4]
-    # combined_indexes = index_sorted0 +index_sorted1 +index_sorted2 +index_sorted3
-    # uniq_list = []
-    # print("combined_indexes")
-    # print(combined_indexes)
-    # for item in combined_indexes:
-    #     if item not in uniq_list:
-    #         uniq_list.append(item)
-    # print("uniq_list")
-    # print(uniq_list)
-    # top_3_indexes = []
-    # get the top 3 from the combined_indexes
-    # the top 3 indexes must be above .78 similarity score
-    # the top 3 must have occured 4 times or more in combined_indexes
 if __name__ == '__main__':
   app.run(host='0.0.0.0', port=8080)

+search = GoogleSearchAPIWrapper()
 bearer = "Bearer " + os.getenv('TOKEN')
 headers = {"Authorization": bearer }
 @app.route('/')
 async def server_1():
+    # check docs first then check similarity
     query_sentence = "Obama's first name"
     duck_results = []
     try:
+        searchduck = DuckDuckGoSearchResults(output_format="list", max_results=5, num_results=5)
         duck_results = searchduck.invoke(query_sentence)
     except:
       print("An exception occurred")
     tool = Tool(
         name="google_search",
         description="Search Google for recent results.",
         func=search.run,
     )
     google_results = search.results( query_sentence , 10 )
+    print("type(duck_results)") ; print(type(duck_results)) ; print(type(google_results)
     all_results = duck_results + google_results
     # get the snippet put into list
     for x in split_query_words:
         if x.isupper():
             important_keywords.append(x)
+    ## get the longest word in sentence
+    # res = "" ; iteratorx = 0
+    # for word in split_query_words:
+    #     if len(word) > len(res):
+    #         res = word
     # get google 20 items
     # get user query in the url param
     # payload = {  "inputs": {  "source_sentence": "That is a green painted house",  "sentences": ["The house paint is green",  "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green",   "That house looks green",   "Today the house is clean "  ] } , }
     # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976",  "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines",   "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. "  ] } , }
     # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
     payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
     response0 =  requests.post(API_URL0, headers=headers, json=payload)
     response1 =  requests.post(API_URL1, headers=headers, json=payload)
     response2 =  requests.post(API_URL2, headers=headers, json=payload)
     response3 =  requests.post(API_URL3, headers=headers, json=payload)
     varcontinue_similarity = 0
     print("type( response0.json() )")
     print(type(  response0.json() ))
     print(type(  response1.json() ))
     print(type(  response2.json() ))
     print(type(  response3.json() ))
     if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
         similarity_scores =  response0.json() + response1.json() + response2.json() + response3.json()
+        # If all list then pass to process func
+        sorted0 = sorted(response0.json() , reverse=True); sorted1 = sorted(response1.json() , reverse=True)
+        sorted2 = sorted(response1.json() , reverse=True); sorted3 = sorted(response3.json() , reverse=True)
         varcontinue_similarity = 1
     else:
         similarity_scores = "There's an error in llm similarity search retrieval"
     if varcontinue_similarity == 1 :
         # call processing with 10 google search result or 15 search results
         if len(all_results) == 10 :
+            result_processed = process_similarity_10(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
         if len(all_results) > 10 :
+            result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
     # return all_results
     return result_processed
+def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actualscore1, actualscore2, actualscore3):
     # print(similarity_scores)
     # print(type(similarity_scores))
     print("length")
+    # print(len(similarity_scores))
     key_index = 0
     #copy + loop to get index
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
     sorted0_with_index = []
     for x in sorted0:
+        for y in actualscore0:
             if x == y:
                 print("index of sorted0")
+                print(actualscore0.index(y))
                 if x > 0.90:
+                    sorted0_with_index.append(actualscore0.index(y))
                     print("sorted_with_index")
                     print(sorted0_with_index)
     print("sorted0_with_index")
     print(sorted0_with_index)
     sorted1_with_index = []
     for x in sorted1:
+        for y in actualscore1:
             if x == y:
                 print("index of sorted1")
+                print(actualscore1.index(y))
                 if y > 0.90:
+                    sorted1_with_index.append(actualscore1.index(y))
                     print("sorted_with_index")
                     print(sorted1_with_index)
     sorted2_with_index = []
     print("b4 for x in sorted2:")
+    print("resp_list2:" + str(actualscore2))
     print("sorted:" + str(sorted2))
     for x in sorted2:
+        for y in actualscore2:
             if x == y:
                 print("index of sorted2")
+                print(actualscore2.index(y))
                 if y > 0.90:
+                    sorted2_with_index.append(actualscore2.index(y))
                     print("sorted_with_index")
                     print(sorted2_with_index)
     print(sorted2_with_index)
     sorted3_with_index = []
     print("b4 for x in sorted3:")
+    print("resp_list3:" + str(actualscore3))
     for x in sorted3:
+        for y in actualscore3:
             if x == y:
                 print("index of sorted3")
+                print(actualscore3.index(y))
                 if y > 0.90:
+                    sorted3_with_index.append(actualscore3.index(y))
                     print("sorted_with_index")
                     print(sorted3_with_index)
         print("No reliable similarity found by 4 llms")
     return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
+def process_similarity_10(sorted0, sorted1, sorted2, sorted3, actualscore0, actualscore1, actualscore2, actualscore3):
     # print(similarity_scores)
     # print(type(similarity_scores))
     print("length")
+    # print(len(similarity_scores))
     key_index = 0
     #copy + loop to get index
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
     sorted0_with_index = []
     for x in sorted0:
+        for y in actualscore0:
             if x == y:
                 print("index of sorted0")
+                print(actualscore0.index(y))
                 if x > 0.90:
+                    sorted0_with_index.append(actualscore0.index(y))
                     print("sorted_with_index")
                     print(sorted0_with_index)
     print("sorted0_with_index")
     print(sorted0_with_index)
     sorted1_with_index = []
     for x in sorted1:
+        for y in actualscore1:
             if x == y:
                 print("index of sorted1")
+                print(actualscore1.index(y))
                 if y > 0.90:
+                    sorted1_with_index.append(actualscore1.index(y))
                     print("sorted_with_index")
                     print(sorted1_with_index)
     sorted2_with_index = []
     print("b4 for x in sorted2:")
+    print("resp_list2:" + str(actualscore2))
     print("sorted:" + str(sorted2))
     for x in sorted2:
+        for y in actualscore2:
             if x == y:
                 print("index of sorted2")
+                print(actualscore2.index(y))
                 if y > 0.90:
+                    sorted2_with_index.append(actualscore2.index(y))
                     print("sorted_with_index")
                     print(sorted2_with_index)
     print(sorted2_with_index)
     sorted3_with_index = []
     print("b4 for x in sorted3:")
+    print("resp_list3:" + str(actualscore3))
     for x in sorted3:
+        for y in actualscore3:
             if x == y:
                 print("index of sorted3")
+                print(actualscore3.index(y))
                 if y > 0.90:
+                    sorted3_with_index.append(actualscore3.index(y))
                     print("sorted_with_index")
                     print(sorted3_with_index)
     else:
         print("No reliable similarity found by 4 llms")
     return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
 if __name__ == '__main__':
   app.run(host='0.0.0.0', port=8080)