import gradio as gr import pandas as pd import numpy as np import joblib # Horvath coefficients from a local CSV file horvath_coefficients = pd.read_csv("./coefficients/horvath_clock.csv") # Pheno coeffs pheno_coefficients = pd.read_csv("./coefficients/pheno_clock.csv") # Dunedin coeffs dunedin_coeffs = pd.read_csv("./coefficients/dunedin_clock.csv") def compute_horvath_score(uploaded_file, clock_selection, coefficients, intercept): user_data = pd.read_csv(uploaded_file.name) # the user data must have the required columns if 'probes' not in user_data.columns or 'beta' not in user_data.columns: return "Error: Uploaded CSV must have columns 'probes' and 'beta'." user_data.rename(columns={"probes": "Marker"}, inplace=True) # merge user data with the Horvath coefficients on 'Marker' common_markers = len(np.intersect1d(user_data['Marker'].values, coefficients['Marker'].values)) if common_markers != len(coefficients['Marker'].values): # provided file does not have all the beta values required for computation return f"Error: uploaded methylation file does not contain all required coefficients for {clock_selection}" merged_data = pd.merge(user_data, coefficients, on='Marker', how='inner') # compute the Horvath score as the dot product of 'beta' and 'coefficients' score = (merged_data['beta'] * merged_data['Coefficient']).sum() + intercept return f"{clock_selection} score: {score}" def beta_to_m(x): res = np.log(x/(1-x)) if res == -np.inf: return np.log((x + 0.0000001)/(1-x + 0.0000001)) return res def custom_clock_computation(uploaded_file): scaler = joblib.load('scaler_custom.pkl') pca = joblib.load('pca_custom.pkl') user_data = pd.read_csv(uploaded_file.name).T # Set the first row containing site names as the column names user_data.columns = user_data.iloc[0] user_data = user_data[1:].reset_index(drop=True) # Check if all required sites are there cpgs = np.load("cols.npy", allow_pickle=True) cols_subset = set(cpgs).issubset(set(user_data.columns.values)) if not cols_subset: return "Error: missing methylation sites for custom SVR age computation" # Load the required columns user_filtered = user_data[cpgs] user_filtered = user_filtered.applymap(beta_to_m) # Compute scaling and PCA user_pca = pca.transform(scaler.transform(user_filtered)) age_predictor = joblib.load("svr_model.pkl") # Finally feed into predictor age = age_predictor.predict(user_pca)[0] return f"Custom svr clock: {age}" def process_file(file, clock_selection): if clock_selection == "Horvath": return compute_horvath_score(file, clock_selection, horvath_coefficients, 0.695507258) elif clock_selection == "PhenoAge": return compute_horvath_score(file, clock_selection, pheno_coefficients, pheno_coefficients[['Intercept']].values[0][0]) elif clock_selection == "DunedinPace": return compute_horvath_score(file, clock_selection, dunedin_coeffs, 0) elif clock_selection == "Custom SVR": return custom_clock_computation(file) else: return "Currently, only Horvath clock is supported." with gr.Blocks() as demo: gr.Markdown("# Biological Clock Age Estimator") # file uploader file_input = gr.File(label="Upload your CSV file with 'probes' and 'beta' columns") # dropdown for clock selection and button to trigger computation clock_selector = gr.Dropdown(choices=["Horvath", "PhenoAge", "DunedinPace", "Custom SVR"], label="Select Biological Clock") output = gr.Textbox(label="Output") run_button = gr.Button("Compute Score") # process_file function is called when button is clicked run_button.click(process_file, inputs=[file_input, clock_selector], outputs=output) # Launch the Gradio interface demo.launch()