bioclock / app.py
amroa's picture
init commit
04e56bf
raw
history blame
3.94 kB
import gradio as gr
import pandas as pd
import numpy as np
import joblib
# Horvath coefficients from a local CSV file
horvath_coefficients = pd.read_csv("./coefficients/horvath_clock.csv")
# Pheno coeffs
pheno_coefficients = pd.read_csv("./coefficients/pheno_clock.csv")
# Dunedin coeffs
dunedin_coeffs = pd.read_csv("./coefficients/dunedin_clock.csv")
def compute_horvath_score(uploaded_file, clock_selection, coefficients, intercept):
user_data = pd.read_csv(uploaded_file.name)
# the user data must have the required columns
if 'probes' not in user_data.columns or 'beta' not in user_data.columns:
return "Error: Uploaded CSV must have columns 'cpg_sites' and 'beta'."
user_data.rename(columns={"probes": "Marker"}, inplace=True)
# merge user data with the Horvath coefficients on 'Marker'
common_markers = len(np.intersect1d(user_data['Marker'].values, coefficients['Marker'].values))
if common_markers != len(coefficients['Marker'].values):
# provided file does not have all the beta values required for computation
return f"Error: uploaded methylation file does not contain all required coefficients for {clock_selection}"
merged_data = pd.merge(user_data, coefficients, on='Marker', how='inner')
# compute the Horvath score as the dot product of 'beta' and 'coefficients'
score = (merged_data['beta'] * merged_data['Coefficient']).sum() + intercept
return f"{clock_selection} score: {score}"
def beta_to_m(x):
res = np.log(x/(1-x))
if res == -np.inf:
return np.log((x + 0.0000001)/(1-x + 0.0000001))
return res
def custom_clock_computation(uploaded_file):
scaler = joblib.load('scaler_custom.pkl')
pca = joblib.load('pca_custom.pkl')
user_data = pd.read_csv(uploaded_file.name).T
# Set the first row containing site names as the column names
user_data.columns = user_data.iloc[0]
user_data = user_data[1:].reset_index(drop=True)
# Check if all required sites are there
cpgs = np.load("cols.npy", allow_pickle=True)
cols_subset = set(cpgs).issubset(set(user_data.columns.values))
if not cols_subset:
return "Error: missing methylation sites for custom SVR age computation"
# Load the required columns
user_filtered = user_data[cpgs]
user_filtered = user_filtered.applymap(beta_to_m)
# Compute scaling and PCA
user_pca = pca.transform(scaler.transform(user_filtered))
age_predictor = joblib.load("svr_model.pkl")
# Finally feed into predictor
age = age_predictor.predict(user_pca)[0]
return f"Custom clock: {age}"
def process_file(file, clock_selection):
if clock_selection == "Horvath":
return compute_horvath_score(file, clock_selection, horvath_coefficients, 0.695507258)
elif clock_selection == "PhenoAge":
return compute_horvath_score(file, clock_selection, pheno_coefficients, pheno_coefficients[['Intercept']].values[0][0])
elif clock_selection == "DunedinPace":
return compute_horvath_score(file, clock_selection, dunedin_coeffs, 0)
elif clock_selection == "Custom SVR":
return custom_clock_computation(file)
else:
return "Currently, only Horvath clock is supported."
with gr.Blocks() as demo:
gr.Markdown("# Biological Clock Age Estimator")
# file uploader
file_input = gr.File(label="Upload your CSV file with 'cpg_sites' and 'beta' columns")
# dropdown for clock selection and button to trigger computation
clock_selector = gr.Dropdown(choices=["Horvath", "PhenoAge", "DunedinPace", "Custom SVR"], label="Select Biological Clock")
output = gr.Textbox(label="Output")
run_button = gr.Button("Compute Score")
# process_file function is called when button is clicked
run_button.click(process_file, inputs=[file_input, clock_selector], outputs=output)
# Launch the Gradio interface
demo.launch()