import gradio as gr
import pandas as pd
import numpy as np
import joblib

# Horvath coefficients from a local CSV file
horvath_coefficients = pd.read_csv("./coefficients/horvath_clock.csv") 
# Pheno coeffs
pheno_coefficients = pd.read_csv("./coefficients/pheno_clock.csv") 
# Dunedin coeffs
dunedin_coeffs = pd.read_csv("./coefficients/dunedin_clock.csv")

def compute_horvath_score(uploaded_file, clock_selection, coefficients, intercept):
    user_data = pd.read_csv(uploaded_file.name)
    
    # the user data must have the required columns
    if 'probes' not in user_data.columns or 'beta' not in user_data.columns:
        return "Error: Uploaded CSV must have columns 'probes' and 'beta'."
    
    user_data.rename(columns={"probes": "Marker"}, inplace=True)
    
    # merge user data with the Horvath coefficients on 'Marker'
    common_markers = len(np.intersect1d(user_data['Marker'].values, coefficients['Marker'].values))
    if common_markers != len(coefficients['Marker'].values):
        # provided file does not have all the beta values required for computation
        return f"Error: uploaded methylation file does not contain all required coefficients for {clock_selection}"

    merged_data = pd.merge(user_data, coefficients, on='Marker', how='inner') 
    # compute the Horvath score as the dot product of 'beta' and 'coefficients'
    score = (merged_data['beta'] * merged_data['Coefficient']).sum() + intercept
    
    return f"{clock_selection} score: {score}"

def beta_to_m(x):
    res = np.log(x/(1-x))
    if res == -np.inf:
        return np.log((x + 0.0000001)/(1-x + 0.0000001))
    return res

def custom_clock_computation(uploaded_file):
    scaler = joblib.load('scaler_custom.pkl')
    pca = joblib.load('pca_custom.pkl')
    user_data = pd.read_csv(uploaded_file.name).T
    
    # Set the first row containing site names as the column names
    user_data.columns = user_data.iloc[0]
    user_data = user_data[1:].reset_index(drop=True)

    # Check if all required sites are there
    cpgs = np.load("cols.npy", allow_pickle=True)
    cols_subset = set(cpgs).issubset(set(user_data.columns.values))
    if not cols_subset:
        return "Error: missing methylation sites for custom SVR age computation"
    
    # Load the required columns
    user_filtered = user_data[cpgs]
    user_filtered = user_filtered.applymap(beta_to_m)
    
    # Compute scaling and PCA
    user_pca = pca.transform(scaler.transform(user_filtered))
    age_predictor = joblib.load("svr_model.pkl")

    # Finally feed into predictor
    age = age_predictor.predict(user_pca)[0]
    return f"Custom svr clock: {age}"


def process_file(file, clock_selection):
    if clock_selection == "Horvath":
        return compute_horvath_score(file, clock_selection, horvath_coefficients, 0.695507258)
    elif clock_selection == "PhenoAge":
        return compute_horvath_score(file, clock_selection, pheno_coefficients, pheno_coefficients[['Intercept']].values[0][0])
    elif clock_selection == "DunedinPace":
        return compute_horvath_score(file, clock_selection, dunedin_coeffs, 0)
    elif clock_selection == "Custom SVR":
        return custom_clock_computation(file)
    else:
        return "Currently, only Horvath clock is supported."

with gr.Blocks() as demo:
    gr.Markdown("# Biological Clock Age Estimator")
    
    # file uploader
    file_input = gr.File(label="Upload your CSV file with 'probes' and 'beta' columns")
    
    # dropdown for clock selection and button to trigger computation
    clock_selector = gr.Dropdown(choices=["Horvath", "PhenoAge", "DunedinPace", "Custom SVR"], label="Select Biological Clock")
    output = gr.Textbox(label="Output")
    run_button = gr.Button("Compute Score")
    
    # process_file function is called when button is clicked
    run_button.click(process_file, inputs=[file_input, clock_selector], outputs=output)

# Launch the Gradio interface
demo.launch()