mtyrrell's picture
init new space
6829fd5
raw
history blame
6.54 kB
import torch
try:
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
try:
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
except Exception as e:
print(f"Error getting CUDA device name: {str(e)}")
else:
print("No CUDA device available - using CPU")
except Exception as e:
print(f"Error checking CUDA availability: {str(e)}")
print("Continuing with CPU...")
import streamlit as st
import os
from huggingface_hub import login
from datetime import datetime
from modules.auth import validate_login, check_password
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data
# Local
# from dotenv import load_dotenv
# load_dotenv()
# Main app logic
def main():
# Temporarily set authentication to True for testing
if 'authenticated' not in st.session_state:
st.session_state['authenticated'] = True
if st.session_state['authenticated']:
# Remove login success message for testing
hf_token = os.environ["HF_TOKEN"]
login(token=hf_token, add_to_git_credential=True)
# Initialize session state variables
if 'data_processed' not in st.session_state:
st.session_state['data_processed'] = False
st.session_state['df'] = None
# Main Streamlit app
st.title('MAF Application Pre-Filtering Tool')
# Sidebar (filters)
with st.sidebar:
with st.expander("ℹ️ - Instructions", expanded=False):
st.markdown(
"""
1. **Download the Excel Template file (below).**
2. **[OPTIONAL]: Select the desired filtering sensitivity level (below).**
3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
4. **Upload the template file in the area to the right (or click browse files).**
The tool will immediately start processing the uploaded application data. This can take considerable time
depending on the number of applications and the length of text in each. For example, a file with 500 applications
could be expected to take approximately 20 minutes.
***NOTE (1)** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
***NOTE (2)** - as of April 2024 this app running as a **test version**, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.*
"""
)
# Excel file download
st.download_button(
label="Download Excel Template",
data=create_excel(),
file_name="MAF_upload_template.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
# get sensitivity level for use in review / reject (ref. process_data function)
sens_options = {
"Low": 4,
"Medium": 5,
"High": 7,
}
sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
help = 'Increasing the level of sensitivity results in more \
applications being filtered out. At the same time, this also \
increases the probability of false negatives (FNs). The rate of \
FNs at the lowest setting is approximately 6 percent, and \
approaches 13 percent at the highest setting. ',
options = list(sens_options.keys()),
horizontal = False)
sens_level = sens_options[sens_input]
with st.expander("ℹ️ - About this app", expanded=False):
st.write(
"""
This tool provides an interface for running an automated preliminary assessment of applications to the MAF call for applications.
The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
""")
st.image('images/pipeline.png')
uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
if uploaded_file is not None:
try:
if not st.session_state['data_processed']:
st.session_state['df'] = process_data(uploaded_file, sens_level)
st.session_state['data_processed'] = True
df = st.session_state['df']
# Get the current date
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
output_filename = 'processed_applications_'+current_datetime+'.csv'
output_file = 'processed_applications.csv'
df.to_csv(output_file, index=False)
st.download_button(
label="Download data as CSV",
data=open(output_file, 'rb'),
file_name=output_filename,
mime='text/csv',
)
except:
st.error("Failed to process the file. Please ensure your column names match the template file.")
# Comment out or remove the else block containing login form
# else:
# username = st.text_input("Username")
# password = st.text_input("Password", type="password")
# if st.button("Login"):
# if validate_login(username, password):
# st.session_state['authenticated'] = True
# st.experimental_rerun()
# else:
# st.error("Incorrect username or password")
# Run the main function
main()