|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import joblib |
|
|
|
|
|
horvath_coefficients = pd.read_csv("./coefficients/horvath_clock.csv") |
|
|
|
pheno_coefficients = pd.read_csv("./coefficients/pheno_clock.csv") |
|
|
|
dunedin_coeffs = pd.read_csv("./coefficients/dunedin_clock.csv") |
|
|
|
def compute_horvath_score(uploaded_file, clock_selection, coefficients, intercept): |
|
user_data = pd.read_csv(uploaded_file.name) |
|
|
|
|
|
if 'probes' not in user_data.columns or 'beta' not in user_data.columns: |
|
return "Error: Uploaded CSV must have columns 'cpg_sites' and 'beta'." |
|
|
|
user_data.rename(columns={"probes": "Marker"}, inplace=True) |
|
|
|
|
|
common_markers = len(np.intersect1d(user_data['Marker'].values, coefficients['Marker'].values)) |
|
if common_markers != len(coefficients['Marker'].values): |
|
|
|
return f"Error: uploaded methylation file does not contain all required coefficients for {clock_selection}" |
|
|
|
merged_data = pd.merge(user_data, coefficients, on='Marker', how='inner') |
|
|
|
score = (merged_data['beta'] * merged_data['Coefficient']).sum() + intercept |
|
|
|
return f"{clock_selection} score: {score}" |
|
|
|
def beta_to_m(x): |
|
res = np.log(x/(1-x)) |
|
if res == -np.inf: |
|
return np.log((x + 0.0000001)/(1-x + 0.0000001)) |
|
return res |
|
|
|
def custom_clock_computation(uploaded_file): |
|
scaler = joblib.load('scaler_custom.pkl') |
|
pca = joblib.load('pca_custom.pkl') |
|
user_data = pd.read_csv(uploaded_file.name).T |
|
|
|
|
|
user_data.columns = user_data.iloc[0] |
|
user_data = user_data[1:].reset_index(drop=True) |
|
|
|
|
|
cpgs = np.load("cols.npy", allow_pickle=True) |
|
cols_subset = set(cpgs).issubset(set(user_data.columns.values)) |
|
if not cols_subset: |
|
return "Error: missing methylation sites for custom SVR age computation" |
|
|
|
|
|
user_filtered = user_data[cpgs] |
|
user_filtered = user_filtered.applymap(beta_to_m) |
|
|
|
|
|
user_pca = pca.transform(scaler.transform(user_filtered)) |
|
age_predictor = joblib.load("svr_model.pkl") |
|
|
|
|
|
age = age_predictor.predict(user_pca)[0] |
|
return f"Custom clock: {age}" |
|
|
|
|
|
def process_file(file, clock_selection): |
|
if clock_selection == "Horvath": |
|
return compute_horvath_score(file, clock_selection, horvath_coefficients, 0.695507258) |
|
elif clock_selection == "PhenoAge": |
|
return compute_horvath_score(file, clock_selection, pheno_coefficients, pheno_coefficients[['Intercept']].values[0][0]) |
|
elif clock_selection == "DunedinPace": |
|
return compute_horvath_score(file, clock_selection, dunedin_coeffs, 0) |
|
elif clock_selection == "Custom SVR": |
|
return custom_clock_computation(file) |
|
else: |
|
return "Currently, only Horvath clock is supported." |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Biological Clock Age Estimator") |
|
|
|
|
|
file_input = gr.File(label="Upload your CSV file with 'cpg_sites' and 'beta' columns") |
|
|
|
|
|
clock_selector = gr.Dropdown(choices=["Horvath", "PhenoAge", "DunedinPace", "Custom SVR"], label="Select Biological Clock") |
|
output = gr.Textbox(label="Output") |
|
run_button = gr.Button("Compute Score") |
|
|
|
|
|
run_button.click(process_file, inputs=[file_input, clock_selector], outputs=output) |
|
|
|
|
|
demo.launch() |
|
|