nlpblogs commited on
Commit
ac15b3a
·
verified ·
1 Parent(s): b67779c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +46 -95
src/streamlit_app.py CHANGED
@@ -1,20 +1,16 @@
1
  import streamlit as st
2
  from cryptography.fernet import Fernet
3
  import time
4
- import pandas as pd
5
  import io
6
  from transformers import pipeline
7
  from streamlit_extras.stylable_container import stylable_container
8
  import json
9
 
10
- import nltk
11
 
12
- import plotly.express as px
13
  from PyPDF2 import PdfReader
14
  import docx
15
- import zipfile
16
-
17
- from gliner import GLiNER
18
 
19
 
20
 
@@ -97,9 +93,9 @@ if 'file_upload_attempts' not in st.session_state:
97
  max_attempts = 5
98
 
99
  # upload file
100
- upload_file = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
101
  text = None
102
- df = None
103
 
104
  if upload_file is not None:
105
 
@@ -128,96 +124,51 @@ if upload_file is not None:
128
 
129
 
130
 
131
- # generate and validate Fernet token for the current file
132
- if 'fernet_token' not in st.session_state:
133
- if text is not None:
134
- st.session_state.fernet_token = generate_fernet_token(key, text)
135
- else:
136
- st.stop()
137
-
138
- decrypted_data_streamlit, error_streamlit = validate_fernet_token(key, st.session_state.fernet_token, ttl_seconds=3600)
139
-
140
- if error_streamlit:
141
- if text is not None:
142
- st.warning("Please press Request Authorization.")
143
- if st.button("Request Authorization"):
144
- st.session_state.fernet_token = generate_fernet_token(key, text)
145
- st.success("Authorization granted")
146
- decrypted_data_streamlit, error_streamlit = validate_fernet_token(key, st.session_state.fernet_token, ttl_seconds=3600)
147
- if error_streamlit:
148
- st.error(f"Your authorization has expired: {error_streamlit}")
149
- st.stop()
150
-
151
-
152
- st.divider()
153
-
154
 
155
-
156
- #retrieve answer
157
- if st.button("Results"):
158
- if st.session_state['file_upload_attempts'] >= max_attempts:
159
  st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
160
  st.stop()
161
- st.session_state['file_upload_attempts'] += 1
162
- if error_streamlit:
163
- st.warning("Please upload a file before retrieving the results.")
164
- else:
165
- with st.spinner('Wait for it...'):
166
- time.sleep(2)
167
- model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
168
- labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
169
- entities = model.predict_entities(text, labels)
170
- df = pd.DataFrame(entities)
171
-
172
- properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
173
- df_styled = df.style.set_properties(**properties)
174
- st.dataframe(df_styled)
175
- if df is not None:
176
- value_counts1 = df['label'].value_counts()
177
-
178
- df1 = pd.DataFrame(value_counts1)
179
-
180
- final_df = df1.reset_index().rename(columns={"index": "label"})
181
-
182
- col1, col2 = st.columns(2)
183
- with col1:
184
- fig1 = px.pie(final_df, values='count', names='label', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
185
- fig1.update_traces(textposition='inside', textinfo='percent+label')
186
- st.plotly_chart(fig1)
187
- with col2:
188
- fig2 = px.bar(final_df, x="count", y="label", color="label", text_auto=True, title='Occurrences of predicted labels')
189
- st.plotly_chart(fig2)
190
-
191
- dfa = pd.DataFrame(
192
- data={
193
-
194
- 'text': ['entity extracted from file'], 'score': ['accuracy score'], 'label': ['label assigned to the extracted entity'],
195
- 'start': ['index of the start of the corresponding entity'],
196
- 'end': ['index of the end of the corresponding entity'],
197
- })
198
- buf = io.BytesIO()
199
- with zipfile.ZipFile(buf, "w") as myzip:
200
- myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
201
- myzip.writestr("Glossary of labels.csv", dfa.to_csv(index=False))
202
-
203
- with stylable_container(
204
- key="download_button",
205
- css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
206
- ):
207
  st.download_button(
208
- label="Download zip file",
209
- data=buf.getvalue(),
210
- file_name="zip file.zip",
211
- mime="application/zip",
212
- )
213
-
214
-
215
-
216
-
217
-
218
-
219
-
220
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  st.divider()
223
- st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts']}/{max_attempts}")
 
 
1
  import streamlit as st
2
  from cryptography.fernet import Fernet
3
  import time
4
+
5
  import io
6
  from transformers import pipeline
7
  from streamlit_extras.stylable_container import stylable_container
8
  import json
9
 
 
10
 
11
+
12
  from PyPDF2 import PdfReader
13
  import docx
 
 
 
14
 
15
 
16
 
 
93
  max_attempts = 5
94
 
95
  # upload file
96
+ upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
97
  text = None
98
+
99
 
100
  if upload_file is not None:
101
 
 
124
 
125
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ if st.button("Results", key="results_ner"):
129
+ if st.session_state['file_upload_attempts_ner'] >= max_attempts:
 
 
130
  st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
131
  st.stop()
132
+ st.session_state['file_upload_attempts_ner'] += 1
133
+ if upload_file_ner:
134
+ with st.spinner('Translating...'): # More informative spinner text
135
+ try:
136
+ pipe_ner = get_translation_pipeline_en_el()
137
+ text_ner = pipe_ner(upload_file_ner)
138
+ if text_ner:
139
+ translated_text_ner = text_ner[0]['translation_text']
140
+ st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
141
+ st.write("**Translated text**: ", translated_text_ner)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  st.download_button(
143
+ label="Download text",
144
+ data=translated_text_ner,
145
+ file_name="translation.txt",
146
+ mime="text/plain", # Specify the mime type
147
+ on_click=None,
148
+ type="primary",
149
+ use_container_width=True, # Makes the button wider
150
+ disabled=not translated_text_ner # Disable if no translation
151
+ )
152
+ except IndexError:
153
+ st.error("The input text is too long. Please try a shorter text (up to 250 words).")
154
+ except Exception as e:
155
+ st.error(f"An unexpected error occurred during translation: {e}")
156
+ elif 'fernet_token_ner' in st.session_state:
157
+ del st.session_state['fernet_token_ner']
158
+
159
+ decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner if 'fernet_token_ner' in st.session_state else None, ttl_seconds=3600)
160
+ if error_streamlit_ner:
161
+ if 'translated_text_ner' in locals():
162
+ st.warning("Please press Request Authorization.")
163
+ if st.button("Request Authorization", key="request_auth_ner"):
164
+ st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
165
+ st.success("Authorization granted")
166
+ decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner, ttl=3600)
167
+ if error_streamlit_ner:
168
+ st.error(f"Your authorization has expired: {error_streamlit_ner}")
169
+ st.stop()
170
+ st.divider()
171
 
172
  st.divider()
173
+ st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
174
+ st.divider()