from huggingface_hub import hf_hub_download
import gradio as gr
import joblib
import json
import lightgbm as lgb
from sentence_transformers import SentenceTransformer
import numpy as np

# Load files from the model repo
repo_id = "Nawal20/Essay"

ridge_path = hf_hub_download(repo_id=repo_id, filename="ridge_model.pkl")
lgb_path = hf_hub_download(repo_id=repo_id, filename="lightgbm_model.pkl")
encoder_path = hf_hub_download(repo_id=repo_id, filename="scaler_encoder.pkl")
metadata_path = hf_hub_download(repo_id=repo_id, filename="metadata_columns.json")

# Load the models and encoder
ridge = joblib.load(ridge_path)
lgb_model = joblib.load(lgb_path)
encoder = joblib.load(encoder_path)

with open(metadata_path, "r") as f:
    metadata_columns = json.load(f)

# Load SBERT model
sbert = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")

def predict_score(essay_text, gender, race_ethnicity, assignment, prompt_name, disadvantaged, disability, ell_status):
    # Encode the essay
    essay_embedding = sbert.encode([essay_text])  # shape (1, 768)

    # Create metadata dict from input
    metadata_input = {
        "gender": gender,
        "race_ethnicity": race_ethnicity,
        "assignment": assignment,
        "prompt_name": prompt_name,
        "economically_disadvantaged": disadvantaged,
        "student_disability_status": disability,
        "ell_status": ell_status
    }

    # Create input array based on column order
    metadata_values = [metadata_input[col] for col in metadata_columns]
    metadata_array = encoder.transform([metadata_values])  # shape (1, N)

    # Combine essay + metadata
    full_input = np.hstack([essay_embedding.reshape(1, -1), metadata_array.toarray()])

    # Predict scores
    ridge_score = ridge.predict(full_input)[0]
    lgb_score = lgb_model.predict(full_input)[0]
    final_score = round((ridge_score + lgb_score) / 2, 2)

    return final_score

# Gradio UI
iface = gr.Interface(
    fn=predict_score,
    inputs=[
        gr.Textbox(label="Essay Text", lines=10, placeholder="Paste your essay here..."),
        gr.Dropdown(["Male", "Female", "Other"], label="Gender"),
        gr.Dropdown(["Asian", "Black", "Hispanic", "White", "Other"], label="Race/Ethnicity"),
        gr.Dropdown(["Informative", "Argumentative", "Narrative"], label="Assignment"),
        gr.Dropdown(["Education Benefits", "Technology Impact", "Climate Change"], label="Prompt Name"),
        gr.Dropdown(["Yes", "No"], label="Economically Disadvantaged"),
        gr.Dropdown(["None", "Learning", "Physical", "Other"], label="Student has Disability"),
        gr.Dropdown(["Yes", "No"], label="ELL Status"),
    ],
    outputs=gr.Number(label="Predicted Essay Score"),
    title="📘 Automated Essay Scoring App"
)

iface.launch()