Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +72 -89
src/streamlit_app.py
CHANGED
@@ -1,28 +1,20 @@
|
|
1 |
import streamlit as st
|
2 |
from cryptography.fernet import Fernet
|
3 |
import time
|
4 |
-
|
5 |
import io
|
6 |
from transformers import pipeline
|
7 |
from streamlit_extras.stylable_container import stylable_container
|
8 |
import json
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
from PyPDF2 import PdfReader
|
13 |
import docx
|
14 |
|
15 |
-
|
16 |
-
|
17 |
st.subheader("Named Entity Recognition (NER)", divider="red")
|
18 |
|
19 |
# generate Fernet key
|
20 |
if 'fernet_key' not in st.session_state:
|
21 |
st.session_state.fernet_key = Fernet.generate_key()
|
22 |
-
|
23 |
key = st.session_state.fernet_key
|
24 |
|
25 |
-
|
26 |
# function for generating and validating fernet key
|
27 |
def generate_fernet_token(key, data):
|
28 |
fernet = Fernet(key)
|
@@ -30,7 +22,6 @@ def generate_fernet_token(key, data):
|
|
30 |
return token
|
31 |
|
32 |
def validate_fernet_token(key, token, ttl_seconds):
|
33 |
-
|
34 |
fernet = Fernet(key)
|
35 |
try:
|
36 |
decrypted_data = fernet.decrypt(token, ttl=ttl_seconds).decode()
|
@@ -38,125 +29,117 @@ def validate_fernet_token(key, token, ttl_seconds):
|
|
38 |
except Exception as e:
|
39 |
return None, f"Expired token: {e}"
|
40 |
|
41 |
-
|
42 |
-
# sidebar
|
43 |
with st.sidebar:
|
44 |
st.button("DEMO APP")
|
45 |
-
|
46 |
-
|
47 |
expander = st.expander("**Important notes on the Demo Named Entity Recognition (NER) App**")
|
48 |
expander.write('''
|
49 |
-
|
50 |
-
**Supported File Formats**
|
51 |
This app accepts files in .pdf and .docx formats.
|
52 |
-
|
53 |
-
|
54 |
Upload your file first. Then, click the 'Results' button.
|
55 |
-
|
56 |
-
|
57 |
-
You can request results up to 5 times.
|
58 |
-
|
59 |
-
|
60 |
This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own Named Entity Recognition (NER) Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]
|
61 |
-
|
62 |
-
|
63 |
-
For security purposes, your authorization access expires hourly. To restore access, click the "Request Authorization" button.
|
64 |
-
|
65 |
-
|
66 |
To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
|
67 |
-
|
68 |
-
|
69 |
The app may display an error message if your file is corrupt, or has other errors.
|
70 |
-
|
71 |
-
|
72 |
For any errors or inquiries, please contact us at [email protected]
|
73 |
-
|
74 |
-
''')
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
# count attempts based on file upload
|
79 |
-
if 'file_upload_attempts' not in st.session_state:
|
80 |
-
st.session_state['file_upload_attempts'] = 0
|
81 |
|
|
|
|
|
|
|
82 |
max_attempts = 5
|
83 |
|
84 |
# upload file
|
85 |
upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
|
86 |
text = None
|
87 |
-
|
88 |
-
|
89 |
if upload_file_ner is not None:
|
90 |
-
|
91 |
-
file_extension = upload_file.name.split('.')[-1].lower()
|
92 |
if file_extension == 'pdf':
|
93 |
try:
|
94 |
-
pdf_reader = PdfReader(
|
95 |
text = ""
|
96 |
for page in pdf_reader.pages:
|
97 |
text += page.extract_text()
|
|
|
98 |
st.write(text)
|
99 |
except Exception as e:
|
100 |
st.error(f"An error occurred while reading PDF: {e}")
|
101 |
elif file_extension == 'docx':
|
102 |
try:
|
103 |
-
doc = docx.Document(
|
104 |
text = "\n".join([para.text for para in doc.paragraphs])
|
|
|
105 |
st.write(text)
|
106 |
except Exception as e:
|
107 |
st.error(f"An error occurred while reading docx: {e}")
|
108 |
else:
|
109 |
st.warning("Unsupported file type.")
|
110 |
-
|
111 |
st.stop()
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
if st.button("Results", key="results_ner"):
|
118 |
-
if st.session_state['file_upload_attempts_ner'] >= max_attempts:
|
119 |
-
st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
|
120 |
-
st.stop()
|
121 |
-
st.session_state['file_upload_attempts_ner'] += 1
|
122 |
-
if upload_file_ner:
|
123 |
-
with st.spinner('Translating...'): # More informative spinner text
|
124 |
-
try:
|
125 |
-
pipe_ner = get_translation_pipeline_en_el()
|
126 |
-
text_ner = pipe_ner(upload_file_ner)
|
127 |
-
if text_ner:
|
128 |
-
translated_text_ner = text_ner[0]['translation_text']
|
129 |
-
st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
|
130 |
-
st.write("**Translated text**: ", translated_text_ner)
|
131 |
-
st.download_button(
|
132 |
-
label="Download text",
|
133 |
-
data=translated_text_ner,
|
134 |
-
file_name="translation.txt",
|
135 |
-
mime="text/plain", # Specify the mime type
|
136 |
-
on_click=None,
|
137 |
-
type="primary",
|
138 |
-
use_container_width=True, # Makes the button wider
|
139 |
-
disabled=not translated_text_ner # Disable if no translation
|
140 |
-
)
|
141 |
-
except IndexError:
|
142 |
-
st.error("The input text is too long. Please try a shorter text (up to 250 words).")
|
143 |
-
except Exception as e:
|
144 |
-
st.error(f"An unexpected error occurred during translation: {e}")
|
145 |
elif 'fernet_token_ner' in st.session_state:
|
146 |
del st.session_state['fernet_token_ner']
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
|
161 |
st.divider()
|
162 |
st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
|
|
|
1 |
import streamlit as st
|
2 |
from cryptography.fernet import Fernet
|
3 |
import time
|
|
|
4 |
import io
|
5 |
from transformers import pipeline
|
6 |
from streamlit_extras.stylable_container import stylable_container
|
7 |
import json
|
|
|
|
|
|
|
8 |
from PyPDF2 import PdfReader
|
9 |
import docx
|
10 |
|
|
|
|
|
11 |
st.subheader("Named Entity Recognition (NER)", divider="red")
|
12 |
|
13 |
# generate Fernet key
|
14 |
if 'fernet_key' not in st.session_state:
|
15 |
st.session_state.fernet_key = Fernet.generate_key()
|
|
|
16 |
key = st.session_state.fernet_key
|
17 |
|
|
|
18 |
# function for generating and validating fernet key
|
19 |
def generate_fernet_token(key, data):
|
20 |
fernet = Fernet(key)
|
|
|
22 |
return token
|
23 |
|
24 |
def validate_fernet_token(key, token, ttl_seconds):
|
|
|
25 |
fernet = Fernet(key)
|
26 |
try:
|
27 |
decrypted_data = fernet.decrypt(token, ttl=ttl_seconds).decode()
|
|
|
29 |
except Exception as e:
|
30 |
return None, f"Expired token: {e}"
|
31 |
|
32 |
+
# sidebar
|
|
|
33 |
with st.sidebar:
|
34 |
st.button("DEMO APP")
|
|
|
|
|
35 |
expander = st.expander("**Important notes on the Demo Named Entity Recognition (NER) App**")
|
36 |
expander.write('''
|
37 |
+
**Supported File Formats**
|
|
|
38 |
This app accepts files in .pdf and .docx formats.
|
39 |
+
|
40 |
+
**How to Use**
|
41 |
Upload your file first. Then, click the 'Results' button.
|
42 |
+
|
43 |
+
**Usage Limits**
|
44 |
+
You can request results up to 5 times.
|
45 |
+
|
46 |
+
**Subscription Management**
|
47 |
This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own Named Entity Recognition (NER) Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]
|
48 |
+
|
49 |
+
**Authorization**
|
50 |
+
For security purposes, your authorization access expires hourly. To restore access, click the "Request Authorization" button.
|
51 |
+
|
52 |
+
**Customization**
|
53 |
To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
|
54 |
+
|
55 |
+
**File Handling and Errors**
|
56 |
The app may display an error message if your file is corrupt, or has other errors.
|
57 |
+
|
|
|
58 |
For any errors or inquiries, please contact us at [email protected]
|
59 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
# count attempts based on file upload
|
62 |
+
if 'file_upload_attempts_ner' not in st.session_state:
|
63 |
+
st.session_state['file_upload_attempts_ner'] = 0
|
64 |
max_attempts = 5
|
65 |
|
66 |
# upload file
|
67 |
upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
|
68 |
text = None
|
|
|
|
|
69 |
if upload_file_ner is not None:
|
70 |
+
file_extension = upload_file_ner.name.split('.')[-1].lower()
|
|
|
71 |
if file_extension == 'pdf':
|
72 |
try:
|
73 |
+
pdf_reader = PdfReader(upload_file_ner)
|
74 |
text = ""
|
75 |
for page in pdf_reader.pages:
|
76 |
text += page.extract_text()
|
77 |
+
st.write("Extracted Text:")
|
78 |
st.write(text)
|
79 |
except Exception as e:
|
80 |
st.error(f"An error occurred while reading PDF: {e}")
|
81 |
elif file_extension == 'docx':
|
82 |
try:
|
83 |
+
doc = docx.Document(upload_file_ner)
|
84 |
text = "\n".join([para.text for para in doc.paragraphs])
|
85 |
+
st.write("Extracted Text:")
|
86 |
st.write(text)
|
87 |
except Exception as e:
|
88 |
st.error(f"An error occurred while reading docx: {e}")
|
89 |
else:
|
90 |
st.warning("Unsupported file type.")
|
|
|
91 |
st.stop()
|
92 |
|
93 |
+
if st.button("Results", key="results_ner"):
|
94 |
+
if st.session_state['file_upload_attempts_ner'] >= max_attempts:
|
95 |
+
st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
|
96 |
+
st.stop()
|
97 |
+
st.session_state['file_upload_attempts_ner'] += 1
|
98 |
+
if upload_file_ner and text: # Ensure text is available before processing
|
99 |
+
with st.spinner('Processing for Named Entities...'):
|
100 |
+
try:
|
101 |
+
pipe_ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Using a standard NER pipeline
|
102 |
+
ner_results = pipe_ner(text)
|
103 |
+
st.write("**Named Entity Recognition Results**: ")
|
104 |
+
st.write(ner_results)
|
105 |
+
# You can further process and display the results in a more user-friendly way
|
106 |
+
|
107 |
+
# Example of generating a token (you might want to tokenize the NER results)
|
108 |
+
# st.session_state.fernet_token_ner = generate_fernet_token(key, json.dumps(ner_results))
|
109 |
+
# st.download_button(
|
110 |
+
# label="Download NER Results (JSON)",
|
111 |
+
# data=json.dumps(ner_results),
|
112 |
+
# file_name="ner_results.json",
|
113 |
+
# mime="application/json",
|
114 |
+
# on_click=None,
|
115 |
+
# type="primary",
|
116 |
+
# use_container_width=True,
|
117 |
+
# disabled=not ner_results
|
118 |
+
# )
|
119 |
+
except Exception as e:
|
120 |
+
st.error(f"An unexpected error occurred during NER processing: {e}")
|
121 |
+
elif not upload_file_ner:
|
122 |
+
st.warning("Please upload a file first.")
|
123 |
+
elif not text:
|
124 |
+
st.warning("Could not extract text from the uploaded file.")
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
elif 'fernet_token_ner' in st.session_state:
|
127 |
del st.session_state['fernet_token_ner']
|
128 |
|
129 |
+
# The following section seems to be related to a translation feature that is not fully defined (get_translation_pipeline_en_el, key_ner).
|
130 |
+
# Since the app is for NER, I'll comment out this part to avoid errors. If you need translation as well, you'll need to define those elements.
|
131 |
+
# decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner if 'fernet_token_ner' in st.session_state else None, ttl_seconds=3600)
|
132 |
+
# if error_streamlit_ner:
|
133 |
+
# if 'translated_text_ner' in locals():
|
134 |
+
# st.warning("Please press Request Authorization.")
|
135 |
+
# if st.button("Request Authorization", key="request_auth_ner"):
|
136 |
+
# st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
|
137 |
+
# st.success("Authorization granted")
|
138 |
+
# decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner, ttl=3600)
|
139 |
+
# if error_streamlit_ner:
|
140 |
+
# st.error(f"Your authorization has expired: {error_streamlit_ner}")
|
141 |
+
# st.stop()
|
142 |
+
# st.divider()
|
143 |
|
144 |
st.divider()
|
145 |
st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
|