Spaces:

amroa
/

bioclock

Sleeping

App Files Files Community

bioclock / app.py

amroa

init commit

04e56bf 8 months ago

raw

history blame

3.94 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import joblib

	# Horvath coefficients from a local CSV file
	horvath_coefficients = pd.read_csv("./coefficients/horvath_clock.csv")
	# Pheno coeffs
	pheno_coefficients = pd.read_csv("./coefficients/pheno_clock.csv")
	# Dunedin coeffs
	dunedin_coeffs = pd.read_csv("./coefficients/dunedin_clock.csv")

	def compute_horvath_score(uploaded_file, clock_selection, coefficients, intercept):
	user_data = pd.read_csv(uploaded_file.name)

	# the user data must have the required columns
	if 'probes' not in user_data.columns or 'beta' not in user_data.columns:
	return "Error: Uploaded CSV must have columns 'cpg_sites' and 'beta'."

	user_data.rename(columns={"probes": "Marker"}, inplace=True)

	# merge user data with the Horvath coefficients on 'Marker'
	common_markers = len(np.intersect1d(user_data['Marker'].values, coefficients['Marker'].values))
	if common_markers != len(coefficients['Marker'].values):
	# provided file does not have all the beta values required for computation
	return f"Error: uploaded methylation file does not contain all required coefficients for {clock_selection}"

	merged_data = pd.merge(user_data, coefficients, on='Marker', how='inner')
	# compute the Horvath score as the dot product of 'beta' and 'coefficients'
	score = (merged_data['beta'] * merged_data['Coefficient']).sum() + intercept

	return f"{clock_selection} score: {score}"

	def beta_to_m(x):
	res = np.log(x/(1-x))
	if res == -np.inf:
	return np.log((x + 0.0000001)/(1-x + 0.0000001))
	return res

	def custom_clock_computation(uploaded_file):
	scaler = joblib.load('scaler_custom.pkl')
	pca = joblib.load('pca_custom.pkl')
	user_data = pd.read_csv(uploaded_file.name).T

	# Set the first row containing site names as the column names
	user_data.columns = user_data.iloc[0]
	user_data = user_data[1:].reset_index(drop=True)

	# Check if all required sites are there
	cpgs = np.load("cols.npy", allow_pickle=True)
	cols_subset = set(cpgs).issubset(set(user_data.columns.values))
	if not cols_subset:
	return "Error: missing methylation sites for custom SVR age computation"

	# Load the required columns
	user_filtered = user_data[cpgs]
	user_filtered = user_filtered.applymap(beta_to_m)

	# Compute scaling and PCA
	user_pca = pca.transform(scaler.transform(user_filtered))
	age_predictor = joblib.load("svr_model.pkl")

	# Finally feed into predictor
	age = age_predictor.predict(user_pca)[0]
	return f"Custom clock: {age}"


	def process_file(file, clock_selection):
	if clock_selection == "Horvath":
	return compute_horvath_score(file, clock_selection, horvath_coefficients, 0.695507258)
	elif clock_selection == "PhenoAge":
	return compute_horvath_score(file, clock_selection, pheno_coefficients, pheno_coefficients[['Intercept']].values[0][0])
	elif clock_selection == "DunedinPace":
	return compute_horvath_score(file, clock_selection, dunedin_coeffs, 0)
	elif clock_selection == "Custom SVR":
	return custom_clock_computation(file)
	else:
	return "Currently, only Horvath clock is supported."

	with gr.Blocks() as demo:
	gr.Markdown("# Biological Clock Age Estimator")

	# file uploader
	file_input = gr.File(label="Upload your CSV file with 'cpg_sites' and 'beta' columns")

	# dropdown for clock selection and button to trigger computation
	clock_selector = gr.Dropdown(choices=["Horvath", "PhenoAge", "DunedinPace", "Custom SVR"], label="Select Biological Clock")
	output = gr.Textbox(label="Output")
	run_button = gr.Button("Compute Score")

	# process_file function is called when button is clicked
	run_button.click(process_file, inputs=[file_input, clock_selector], outputs=output)

	# Launch the Gradio interface
	demo.launch()