File size: 2,788 Bytes
ecd6d40
01817f2
 
1a80667
01817f2
1a80667
 
 
802dcc0
d2415b3
01817f2
d2415b3
84833c2
d2415b3
 
 
 
 
001b0af
d2415b3
 
 
1a80667
01817f2
802dcc0
1a80667
01817f2
802dcc0
 
 
 
 
1a80667
802dcc0
 
 
 
 
 
 
1a80667
 
802dcc0
1a80667
802dcc0
1a80667
802dcc0
 
1a80667
802dcc0
1a80667
 
802dcc0
 
1a80667
 
 
 
01817f2
1a80667
 
 
 
802dcc0
 
1a80667
802dcc0
dcc3b55
1a80667
 
802dcc0
1a80667
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from huggingface_hub import hf_hub_download
import gradio as gr
import joblib
import json
import lightgbm as lgb
from sentence_transformers import SentenceTransformer
import numpy as np

# Load files from the model repo
repo_id = "Nawal20/Essay"

ridge_path = hf_hub_download(repo_id=repo_id, filename="ridge_model.pkl")
lgb_path = hf_hub_download(repo_id=repo_id, filename="lightgbm_model.pkl")
encoder_path = hf_hub_download(repo_id=repo_id, filename="scaler_encoder.pkl")
metadata_path = hf_hub_download(repo_id=repo_id, filename="metadata_columns.json")

# Load the models and encoder
ridge = joblib.load(ridge_path)
lgb_model = joblib.load(lgb_path)
encoder = joblib.load(encoder_path)

with open(metadata_path, "r") as f:
    metadata_columns = json.load(f)

# Load SBERT model
sbert = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")

def predict_score(essay_text, gender, race_ethnicity, assignment, prompt_name, disadvantaged, disability, ell_status):
    # Encode the essay
    essay_embedding = sbert.encode([essay_text])  # shape (1, 768)

    # Create metadata dict from input
    metadata_input = {
        "gender": gender,
        "race_ethnicity": race_ethnicity,
        "assignment": assignment,
        "prompt_name": prompt_name,
        "economically_disadvantaged": disadvantaged,
        "student_disability_status": disability,
        "ell_status": ell_status
    }

    # Create input array based on column order
    metadata_values = [metadata_input[col] for col in metadata_columns]
    metadata_array = encoder.transform([metadata_values])  # shape (1, N)

    # Combine essay + metadata
    full_input = np.hstack([essay_embedding.reshape(1, -1), metadata_array.toarray()])

    # Predict scores
    ridge_score = ridge.predict(full_input)[0]
    lgb_score = lgb_model.predict(full_input)[0]
    final_score = round((ridge_score + lgb_score) / 2, 2)

    return final_score

# Gradio UI
iface = gr.Interface(
    fn=predict_score,
    inputs=[
        gr.Textbox(label="Essay Text", lines=10, placeholder="Paste your essay here..."),
        gr.Dropdown(["Male", "Female", "Other"], label="Gender"),
        gr.Dropdown(["Asian", "Black", "Hispanic", "White", "Other"], label="Race/Ethnicity"),
        gr.Dropdown(["Informative", "Argumentative", "Narrative"], label="Assignment"),
        gr.Dropdown(["Education Benefits", "Technology Impact", "Climate Change"], label="Prompt Name"),
        gr.Dropdown(["Yes", "No"], label="Economically Disadvantaged"),
        gr.Dropdown(["None", "Learning", "Physical", "Other"], label="Student has Disability"),
        gr.Dropdown(["Yes", "No"], label="ELL Status"),
    ],
    outputs=gr.Number(label="Predicted Essay Score"),
    title="📘 Automated Essay Scoring App"
)

iface.launch()