Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +46 -95
src/streamlit_app.py
CHANGED
@@ -1,20 +1,16 @@
|
|
1 |
import streamlit as st
|
2 |
from cryptography.fernet import Fernet
|
3 |
import time
|
4 |
-
|
5 |
import io
|
6 |
from transformers import pipeline
|
7 |
from streamlit_extras.stylable_container import stylable_container
|
8 |
import json
|
9 |
|
10 |
-
import nltk
|
11 |
|
12 |
-
|
13 |
from PyPDF2 import PdfReader
|
14 |
import docx
|
15 |
-
import zipfile
|
16 |
-
|
17 |
-
from gliner import GLiNER
|
18 |
|
19 |
|
20 |
|
@@ -97,9 +93,9 @@ if 'file_upload_attempts' not in st.session_state:
|
|
97 |
max_attempts = 5
|
98 |
|
99 |
# upload file
|
100 |
-
|
101 |
text = None
|
102 |
-
|
103 |
|
104 |
if upload_file is not None:
|
105 |
|
@@ -128,96 +124,51 @@ if upload_file is not None:
|
|
128 |
|
129 |
|
130 |
|
131 |
-
# generate and validate Fernet token for the current file
|
132 |
-
if 'fernet_token' not in st.session_state:
|
133 |
-
if text is not None:
|
134 |
-
st.session_state.fernet_token = generate_fernet_token(key, text)
|
135 |
-
else:
|
136 |
-
st.stop()
|
137 |
-
|
138 |
-
decrypted_data_streamlit, error_streamlit = validate_fernet_token(key, st.session_state.fernet_token, ttl_seconds=3600)
|
139 |
-
|
140 |
-
if error_streamlit:
|
141 |
-
if text is not None:
|
142 |
-
st.warning("Please press Request Authorization.")
|
143 |
-
if st.button("Request Authorization"):
|
144 |
-
st.session_state.fernet_token = generate_fernet_token(key, text)
|
145 |
-
st.success("Authorization granted")
|
146 |
-
decrypted_data_streamlit, error_streamlit = validate_fernet_token(key, st.session_state.fernet_token, ttl_seconds=3600)
|
147 |
-
if error_streamlit:
|
148 |
-
st.error(f"Your authorization has expired: {error_streamlit}")
|
149 |
-
st.stop()
|
150 |
-
|
151 |
-
|
152 |
-
st.divider()
|
153 |
-
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
if st.button("Results"):
|
158 |
-
if st.session_state['file_upload_attempts'] >= max_attempts:
|
159 |
st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
|
160 |
st.stop()
|
161 |
-
st.session_state['
|
162 |
-
if
|
163 |
-
st.
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
|
173 |
-
df_styled = df.style.set_properties(**properties)
|
174 |
-
st.dataframe(df_styled)
|
175 |
-
if df is not None:
|
176 |
-
value_counts1 = df['label'].value_counts()
|
177 |
-
|
178 |
-
df1 = pd.DataFrame(value_counts1)
|
179 |
-
|
180 |
-
final_df = df1.reset_index().rename(columns={"index": "label"})
|
181 |
-
|
182 |
-
col1, col2 = st.columns(2)
|
183 |
-
with col1:
|
184 |
-
fig1 = px.pie(final_df, values='count', names='label', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
|
185 |
-
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
186 |
-
st.plotly_chart(fig1)
|
187 |
-
with col2:
|
188 |
-
fig2 = px.bar(final_df, x="count", y="label", color="label", text_auto=True, title='Occurrences of predicted labels')
|
189 |
-
st.plotly_chart(fig2)
|
190 |
-
|
191 |
-
dfa = pd.DataFrame(
|
192 |
-
data={
|
193 |
-
|
194 |
-
'text': ['entity extracted from file'], 'score': ['accuracy score'], 'label': ['label assigned to the extracted entity'],
|
195 |
-
'start': ['index of the start of the corresponding entity'],
|
196 |
-
'end': ['index of the end of the corresponding entity'],
|
197 |
-
})
|
198 |
-
buf = io.BytesIO()
|
199 |
-
with zipfile.ZipFile(buf, "w") as myzip:
|
200 |
-
myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
|
201 |
-
myzip.writestr("Glossary of labels.csv", dfa.to_csv(index=False))
|
202 |
-
|
203 |
-
with stylable_container(
|
204 |
-
key="download_button",
|
205 |
-
css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
|
206 |
-
):
|
207 |
st.download_button(
|
208 |
-
label="Download
|
209 |
-
data=
|
210 |
-
file_name="
|
211 |
-
mime="
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
st.divider()
|
223 |
-
st.write(f"Number of times you requested results: {st.session_state['
|
|
|
|
1 |
import streamlit as st
|
2 |
from cryptography.fernet import Fernet
|
3 |
import time
|
4 |
+
|
5 |
import io
|
6 |
from transformers import pipeline
|
7 |
from streamlit_extras.stylable_container import stylable_container
|
8 |
import json
|
9 |
|
|
|
10 |
|
11 |
+
|
12 |
from PyPDF2 import PdfReader
|
13 |
import docx
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
|
|
|
93 |
max_attempts = 5
|
94 |
|
95 |
# upload file
|
96 |
+
upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
|
97 |
text = None
|
98 |
+
|
99 |
|
100 |
if upload_file is not None:
|
101 |
|
|
|
124 |
|
125 |
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
if st.button("Results", key="results_ner"):
|
129 |
+
if st.session_state['file_upload_attempts_ner'] >= max_attempts:
|
|
|
|
|
130 |
st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
|
131 |
st.stop()
|
132 |
+
st.session_state['file_upload_attempts_ner'] += 1
|
133 |
+
if upload_file_ner:
|
134 |
+
with st.spinner('Translating...'): # More informative spinner text
|
135 |
+
try:
|
136 |
+
pipe_ner = get_translation_pipeline_en_el()
|
137 |
+
text_ner = pipe_ner(upload_file_ner)
|
138 |
+
if text_ner:
|
139 |
+
translated_text_ner = text_ner[0]['translation_text']
|
140 |
+
st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
|
141 |
+
st.write("**Translated text**: ", translated_text_ner)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
st.download_button(
|
143 |
+
label="Download text",
|
144 |
+
data=translated_text_ner,
|
145 |
+
file_name="translation.txt",
|
146 |
+
mime="text/plain", # Specify the mime type
|
147 |
+
on_click=None,
|
148 |
+
type="primary",
|
149 |
+
use_container_width=True, # Makes the button wider
|
150 |
+
disabled=not translated_text_ner # Disable if no translation
|
151 |
+
)
|
152 |
+
except IndexError:
|
153 |
+
st.error("The input text is too long. Please try a shorter text (up to 250 words).")
|
154 |
+
except Exception as e:
|
155 |
+
st.error(f"An unexpected error occurred during translation: {e}")
|
156 |
+
elif 'fernet_token_ner' in st.session_state:
|
157 |
+
del st.session_state['fernet_token_ner']
|
158 |
+
|
159 |
+
decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner if 'fernet_token_ner' in st.session_state else None, ttl_seconds=3600)
|
160 |
+
if error_streamlit_ner:
|
161 |
+
if 'translated_text_ner' in locals():
|
162 |
+
st.warning("Please press Request Authorization.")
|
163 |
+
if st.button("Request Authorization", key="request_auth_ner"):
|
164 |
+
st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
|
165 |
+
st.success("Authorization granted")
|
166 |
+
decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner, ttl=3600)
|
167 |
+
if error_streamlit_ner:
|
168 |
+
st.error(f"Your authorization has expired: {error_streamlit_ner}")
|
169 |
+
st.stop()
|
170 |
+
st.divider()
|
171 |
|
172 |
st.divider()
|
173 |
+
st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
|
174 |
+
st.divider()
|