Spaces:
Sleeping
Sleeping
Commit
·
ada05be
1
Parent(s):
dbad462
Upgraded Gradio version to 5.6.0 in Readme. Upgraded pyarrow version
Browse files- README.md +1 -1
- requirements.txt +1 -1
- requirements_aws.txt +1 -1
- requirements_gpu.txt +1 -1
- requirements_keyword_only.txt +1 -1
- requirements_no_semantic.txt +0 -9
- search_funcs/spacy_search_funcs.py +3 -3
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🔍
|
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.6.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
-
pyarrow==
|
4 |
openpyxl==3.1.3
|
5 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
6 |
spacy==3.8.0
|
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
+
pyarrow==18.1.0
|
4 |
openpyxl==3.1.3
|
5 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
6 |
spacy==3.8.0
|
requirements_aws.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
-
pyarrow==
|
4 |
openpyxl==3.1.3
|
5 |
spacy==3.8.0
|
6 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
+
pyarrow==18.1.0
|
4 |
openpyxl==3.1.3
|
5 |
spacy==3.8.0
|
6 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
requirements_gpu.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
-
pyarrow==
|
4 |
openpyxl==3.1.3
|
5 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/nightly/cu121
|
6 |
spacy==3.8.0
|
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
+
pyarrow==18.1.0
|
4 |
openpyxl==3.1.3
|
5 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/nightly/cu121
|
6 |
spacy==3.8.0
|
requirements_keyword_only.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
-
pyarrow==
|
4 |
openpyxl==3.1.3
|
5 |
#torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
6 |
spacy==3.8.0
|
|
|
1 |
pandas==2.2.3
|
2 |
polars==0.20.3
|
3 |
+
pyarrow==18.1.0
|
4 |
openpyxl==3.1.3
|
5 |
#torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
6 |
spacy==3.8.0
|
requirements_no_semantic.txt
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
pandas==2.2.2
|
2 |
-
polars==0.20.3
|
3 |
-
pyarrow==14.0.2
|
4 |
-
openpyxl==3.1.2
|
5 |
-
spacy
|
6 |
-
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz
|
7 |
-
gradio
|
8 |
-
lxml==5.1.0
|
9 |
-
boto3==1.34.103
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
search_funcs/spacy_search_funcs.py
CHANGED
@@ -131,7 +131,7 @@ def spacy_fuzzy_search(string_query:str, tokenised_data: List[List[str]], origin
|
|
131 |
# Out file
|
132 |
query_str_file = "_".join(tokenised_query).replace(" ", "_") # Replace spaces with underscores
|
133 |
query_str_file = re.sub(r'[<>:"/\\|?*]', '', query_str_file) # Remove invalid characters
|
134 |
-
query_str_file = query_str_file[:
|
135 |
|
136 |
results_df_name = output_folder + "fuzzy_keyword_search_result_" + today_rev + "_" + query_str_file + ".xlsx"
|
137 |
|
@@ -140,8 +140,8 @@ def spacy_fuzzy_search(string_query:str, tokenised_data: List[List[str]], origin
|
|
140 |
|
141 |
#results_df_out.to_excel(results_df_name, index= None)
|
142 |
|
143 |
-
print("string_query:", string_query)
|
144 |
-
print(results_df_out)
|
145 |
|
146 |
# Highlight found text and save to file
|
147 |
results_df_out_wb = create_highlighted_excel_wb(results_df_out, string_query, "search_text")
|
|
|
131 |
# Out file
|
132 |
query_str_file = "_".join(tokenised_query).replace(" ", "_") # Replace spaces with underscores
|
133 |
query_str_file = re.sub(r'[<>:"/\\|?*]', '', query_str_file) # Remove invalid characters
|
134 |
+
query_str_file = query_str_file[:100] # Limit to 100 characters
|
135 |
|
136 |
results_df_name = output_folder + "fuzzy_keyword_search_result_" + today_rev + "_" + query_str_file + ".xlsx"
|
137 |
|
|
|
140 |
|
141 |
#results_df_out.to_excel(results_df_name, index= None)
|
142 |
|
143 |
+
#print("string_query:", string_query)
|
144 |
+
#print(results_df_out)
|
145 |
|
146 |
# Highlight found text and save to file
|
147 |
results_df_out_wb = create_highlighted_excel_wb(results_df_out, string_query, "search_text")
|