Spaces:

maxspad
/

nlp-qual-space

Running

App Files Files Community

nlp-qual-space / app.py

maxspad

no qual score summing. truly random samples

e99bd97 almost 2 years ago

raw

history blame

3.55 kB

	import streamlit as st
	import transformers as tf
	import pandas as pd
	from datetime import datetime
	from plotly import graph_objects as go

	from overview import NQDOverview

	import torch

	cuda_available = torch.cuda.is_available()
	print(f"Is CUDA available: {cuda_available}")
	if cuda_available:
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

	# Function to load and cache models
	@st.experimental_singleton(show_spinner=False)
	def load_model(username, prefix, model_name):
	p = tf.pipeline('text-classification', f'{username}/{prefix}-{model_name}', return_all_scores=True)
	return p

	@st.experimental_singleton(show_spinner=False)
	def load_pickle(f):
	return pd.read_pickle(f)

	def get_results(model, c):
	res = model(c)[0]
	scores = [r['score'] for r in res]
	label = max(range(len(scores)), key=lambda i: scores[i])
	# label = float(res['label'].split('_')[1])
	# scores = res['score']
	return {'label': label, 'scores': scores}

	def run_models(model_names, models, c):
	results = {}
	for mn in model_names:
	results[mn] = get_results(models[mn], c)
	return results


	st.title('Assess the QuALity of your feedback')
	st.caption(
	"""Medical education requires high-quality written feedback,
	but evaluating these supervisor narrative comments is time-consuming.
	The QuAL score has validity evidence for measuring the quality of short
	comments in this context. We developed a NLP/ML-powered tool to
	assess written comment quality via the QuAL score with high accuracy.

	Try it for yourself!
	""")

	### Load models
	# Specify which models to load
	USERNAME = 'maxspad'
	PREFIX = 'nlp-qual'
	models_to_load = ['qual', 'q1', 'q2i', 'q3i']
	n_models = float(len(models_to_load))
	models = {}
	# Show a progress bar while models are downloading,
	# then hide it when done
	lc_placeholder = st.empty()
	loader_container = lc_placeholder.container()
	loader_container.caption('Loading models... please wait...')
	pbar = loader_container.progress(0.0)
	for i, mn in enumerate(models_to_load):
	pbar.progress((i+1.0) / n_models)
	models[mn] = load_model(USERNAME, PREFIX, mn)
	lc_placeholder.empty()

	### Load example data
	examples = load_pickle('test.pkl')

	### Process input
	ex = examples['comment'].sample(1, random_state=int(datetime.now().timestamp())).tolist()[0]
	try:
	ex = ex.strip().replace('_x000D_', '').replace('nan', 'blank')
	except:
	ex = 'blank'
	if 'comment' not in st.session_state:
	st.session_state['comment'] = ex
	with st.form('comment_form'):
	comment = st.text_area('Try a comment:', value=st.session_state['comment'])
	left_col, right_col = st.columns([1,9], gap='medium')
	submitted = left_col.form_submit_button('Submit')
	trying_example = right_col.form_submit_button('Try an example!')

	if submitted:
	st.session_state['button_clicked'] = 'submit'
	st.session_state['comment'] = comment
	st.experimental_rerun()
	elif trying_example:
	st.session_state['button_clicked'] = 'example'
	st.session_state['comment'] = ex
	st.experimental_rerun()

	results = run_models(models_to_load, models, st.session_state['comment'])
	# Modify results to sum the QuAL score and to ignore Q3 if Q2 no suggestion
	# if results['q2i']['label'] == 1:
	# results['q3i']['label'] = 1 # can't have connection if no suggestion
	# results['qual']['label'] = results['q1']['label'] + (not results['q2i']['label']) + (not results['q3i']['label'])

	overview = NQDOverview(st, results)
	overview.draw()