Circhastic's picture
added classifier implementation and associated files
9ac8247
raw
history blame
1.79 kB
import gradio as gr
import numpy as np
import tensorflow as tf
from tokenizers import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Load trained tokenizer and model
tokenizer = Tokenizer.from_file("cr_tokenizer.json")
model = tf.keras.models.load_model("crv3.keras")
# Tokenization function
def tokenize_java_code(code: str, max_length=100):
"""Tokenizes and pads Java code for model input."""
encoded = tokenizer.encode(code).ids
padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
# Prediction function
def classify_code(input_text, input_file):
"""Classifies Java code readability based on user input."""
# Load Java file if provided
if input_file is not None:
code = input_file.read().decode("utf-8") # Read Java file as text
else:
code = input_text # Use text input
if not code.strip(): # Ensure input is not empty
return "Please provide a Java code snippet."
# Tokenize and predict
tokenized_code = tokenize_java_code(code)
prediction = model.predict(tokenized_code)[0][0]
# Convert to readable/unreadable
return "Readable" if prediction > 0.5 else "Unreadable"
# Create Gradio interface
gr.Interface(
fn=classify_code,
inputs=[
gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
gr.File(type="binary", label="Upload Java File (.java)")
],
outputs=gr.Text(label="Readability Prediction"),
title="Java Code Readability Classifier",
description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
allow_flagging="never"
).launch()