Spaces:

mtyrrell
/

maf_prefilter_app

Running

App Files Files Community

maf_prefilter_app / app.py

mtyrrell

init new space

6829fd5 4 months ago

raw

history blame

6.54 kB

	import torch
	try:
	print(f"Is CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	try:
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
	except Exception as e:
	print(f"Error getting CUDA device name: {str(e)}")
	else:
	print("No CUDA device available - using CPU")
	except Exception as e:
	print(f"Error checking CUDA availability: {str(e)}")
	print("Continuing with CPU...")

	import streamlit as st
	import os
	from huggingface_hub import login
	from datetime import datetime
	from modules.auth import validate_login, check_password
	from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data

	# Local
	# from dotenv import load_dotenv
	# load_dotenv()


	# Main app logic
	def main():
	# Temporarily set authentication to True for testing
	if 'authenticated' not in st.session_state:
	st.session_state['authenticated'] = True

	if st.session_state['authenticated']:
	# Remove login success message for testing
	hf_token = os.environ["HF_TOKEN"]
	login(token=hf_token, add_to_git_credential=True)

	# Initialize session state variables
	if 'data_processed' not in st.session_state:
	st.session_state['data_processed'] = False
	st.session_state['df'] = None

	# Main Streamlit app
	st.title('MAF Application Pre-Filtering Tool')

	# Sidebar (filters)
	with st.sidebar:
	with st.expander("ℹ️ - Instructions", expanded=False):
	st.markdown(
	"""
	1. Download the Excel Template file (below).
	2. [OPTIONAL]: Select the desired filtering sensitivity level (below).
	3. Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.
	4. Upload the template file in the area to the right (or click browse files).

	The tool will immediately start processing the uploaded application data. This can take considerable time
	depending on the number of applications and the length of text in each. For example, a file with 500 applications
	could be expected to take approximately 20 minutes.

	*NOTE (1) - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*

	*NOTE (2) - as of April 2024 this app running as a test version, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.*
	"""
	)
	# Excel file download
	st.download_button(
	label="Download Excel Template",
	data=create_excel(),
	file_name="MAF_upload_template.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	# get sensitivity level for use in review / reject (ref. process_data function)
	sens_options = {
	"Low": 4,
	"Medium": 5,
	"High": 7,
	}

	sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
	help = 'Increasing the level of sensitivity results in more \
	applications being filtered out. At the same time, this also \
	increases the probability of false negatives (FNs). The rate of \
	FNs at the lowest setting is approximately 6 percent, and \
	approaches 13 percent at the highest setting. ',
	options = list(sens_options.keys()),
	horizontal = False)

	sens_level = sens_options[sens_input]

	with st.expander("ℹ️ - About this app", expanded=False):
	st.write(
	"""
	This tool provides an interface for running an automated preliminary assessment of applications to the MAF call for applications.

	The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
	The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
	human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).

	""")
	st.image('images/pipeline.png')

	uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")

	if uploaded_file is not None:
	try:
	if not st.session_state['data_processed']:
	st.session_state['df'] = process_data(uploaded_file, sens_level)
	st.session_state['data_processed'] = True

	df = st.session_state['df']

	# Get the current date
	current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
	output_filename = 'processed_applications_'+current_datetime+'.csv'

	output_file = 'processed_applications.csv'
	df.to_csv(output_file, index=False)
	st.download_button(
	label="Download data as CSV",
	data=open(output_file, 'rb'),
	file_name=output_filename,
	mime='text/csv',
	)

	except:
	st.error("Failed to process the file. Please ensure your column names match the template file.")


	# Comment out or remove the else block containing login form
	# else:
	# username = st.text_input("Username")
	# password = st.text_input("Password", type="password")
	# if st.button("Login"):
	# if validate_login(username, password):
	# st.session_state['authenticated'] = True
	# st.experimental_rerun()
	# else:
	# st.error("Incorrect username or password")


	# Run the main function
	main()