Spaces:
Running
Running
import torch | |
try: | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
if torch.cuda.is_available(): | |
try: | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
except Exception as e: | |
print(f"Error getting CUDA device name: {str(e)}") | |
else: | |
print("No CUDA device available - using CPU") | |
except Exception as e: | |
print(f"Error checking CUDA availability: {str(e)}") | |
print("Continuing with CPU...") | |
import streamlit as st | |
import os | |
from huggingface_hub import login | |
from datetime import datetime | |
from modules.auth import validate_login, check_password | |
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data | |
# Local | |
# from dotenv import load_dotenv | |
# load_dotenv() | |
# Main app logic | |
def main(): | |
# Temporarily set authentication to True for testing | |
if 'authenticated' not in st.session_state: | |
st.session_state['authenticated'] = True | |
if st.session_state['authenticated']: | |
# Remove login success message for testing | |
hf_token = os.environ["HF_TOKEN"] | |
login(token=hf_token, add_to_git_credential=True) | |
# Initialize session state variables | |
if 'data_processed' not in st.session_state: | |
st.session_state['data_processed'] = False | |
st.session_state['df'] = None | |
# Main Streamlit app | |
st.title('MAF Application Pre-Filtering Tool') | |
# Sidebar (filters) | |
with st.sidebar: | |
with st.expander("ℹ️ - Instructions", expanded=False): | |
st.markdown( | |
""" | |
1. **Download the Excel Template file (below).** | |
2. **[OPTIONAL]: Select the desired filtering sensitivity level (below).** | |
3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.** | |
4. **Upload the template file in the area to the right (or click browse files).** | |
The tool will immediately start processing the uploaded application data. This can take considerable time | |
depending on the number of applications and the length of text in each. For example, a file with 500 applications | |
could be expected to take approximately 20 minutes. | |
***NOTE (1)** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.* | |
***NOTE (2)** - as of April 2024 this app running as a **test version**, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.* | |
""" | |
) | |
# Excel file download | |
st.download_button( | |
label="Download Excel Template", | |
data=create_excel(), | |
file_name="MAF_upload_template.xlsx", | |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
) | |
# get sensitivity level for use in review / reject (ref. process_data function) | |
sens_options = { | |
"Low": 4, | |
"Medium": 5, | |
"High": 7, | |
} | |
sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]', | |
help = 'Increasing the level of sensitivity results in more \ | |
applications being filtered out. At the same time, this also \ | |
increases the probability of false negatives (FNs). The rate of \ | |
FNs at the lowest setting is approximately 6 percent, and \ | |
approaches 13 percent at the highest setting. ', | |
options = list(sens_options.keys()), | |
horizontal = False) | |
sens_level = sens_options[sens_input] | |
with st.expander("ℹ️ - About this app", expanded=False): | |
st.write( | |
""" | |
This tool provides an interface for running an automated preliminary assessment of applications to the MAF call for applications. | |
The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below). | |
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against | |
human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4). | |
""") | |
st.image('images/pipeline.png') | |
uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)") | |
if uploaded_file is not None: | |
try: | |
if not st.session_state['data_processed']: | |
st.session_state['df'] = process_data(uploaded_file, sens_level) | |
st.session_state['data_processed'] = True | |
df = st.session_state['df'] | |
# Get the current date | |
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S') | |
output_filename = 'processed_applications_'+current_datetime+'.csv' | |
output_file = 'processed_applications.csv' | |
df.to_csv(output_file, index=False) | |
st.download_button( | |
label="Download data as CSV", | |
data=open(output_file, 'rb'), | |
file_name=output_filename, | |
mime='text/csv', | |
) | |
except: | |
st.error("Failed to process the file. Please ensure your column names match the template file.") | |
# Comment out or remove the else block containing login form | |
# else: | |
# username = st.text_input("Username") | |
# password = st.text_input("Password", type="password") | |
# if st.button("Login"): | |
# if validate_login(username, password): | |
# st.session_state['authenticated'] = True | |
# st.experimental_rerun() | |
# else: | |
# st.error("Incorrect username or password") | |
# Run the main function | |
main() | |