Commit
·
98ba9be
1
Parent(s):
9ac8247
updated app to replace comments
Browse files
app.py
CHANGED
@@ -3,19 +3,24 @@ import numpy as np
|
|
3 |
import tensorflow as tf
|
4 |
from tokenizers import Tokenizer
|
5 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
6 |
|
7 |
# Load trained tokenizer and model
|
8 |
tokenizer = Tokenizer.from_file("cr_tokenizer.json")
|
9 |
model = tf.keras.models.load_model("crv3.keras")
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
def tokenize_java_code(code: str, max_length=100):
|
13 |
"""Tokenizes and pads Java code for model input."""
|
14 |
encoded = tokenizer.encode(code).ids
|
15 |
padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
|
16 |
return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
|
17 |
|
18 |
-
# Prediction function
|
19 |
def classify_code(input_text, input_file):
|
20 |
"""Classifies Java code readability based on user input."""
|
21 |
# Load Java file if provided
|
@@ -27,21 +32,23 @@ def classify_code(input_text, input_file):
|
|
27 |
if not code.strip(): # Ensure input is not empty
|
28 |
return "Please provide a Java code snippet."
|
29 |
|
|
|
|
|
|
|
30 |
# Tokenize and predict
|
31 |
-
tokenized_code = tokenize_java_code(
|
32 |
prediction = model.predict(tokenized_code)[0][0]
|
33 |
|
34 |
# Convert to readable/unreadable
|
35 |
return "Readable" if prediction > 0.5 else "Unreadable"
|
36 |
|
37 |
-
# Create Gradio interface
|
38 |
gr.Interface(
|
39 |
fn=classify_code,
|
40 |
inputs=[
|
41 |
gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
|
42 |
gr.File(type="binary", label="Upload Java File (.java)")
|
43 |
],
|
44 |
-
outputs=gr.Text(label="Readability
|
45 |
title="Java Code Readability Classifier",
|
46 |
description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
|
47 |
allow_flagging="never"
|
|
|
3 |
import tensorflow as tf
|
4 |
from tokenizers import Tokenizer
|
5 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
6 |
+
import re
|
7 |
|
8 |
# Load trained tokenizer and model
|
9 |
tokenizer = Tokenizer.from_file("cr_tokenizer.json")
|
10 |
model = tf.keras.models.load_model("crv3.keras")
|
11 |
|
12 |
+
def replace_java_comments(code: str) -> str:
|
13 |
+
"""Replaces Java comments with placeholders."""
|
14 |
+
code = re.sub(r"//.*", " SINGLE_LINE_COMMENT ", code) # Replace single-line comments
|
15 |
+
code = re.sub(r"/\*[\s\S]*?\*/", " MULTI_LINE_COMMENT ", code) # Replace multi-line comments
|
16 |
+
return code.strip() # Keep indentation
|
17 |
+
|
18 |
def tokenize_java_code(code: str, max_length=100):
|
19 |
"""Tokenizes and pads Java code for model input."""
|
20 |
encoded = tokenizer.encode(code).ids
|
21 |
padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
|
22 |
return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
|
23 |
|
|
|
24 |
def classify_code(input_text, input_file):
|
25 |
"""Classifies Java code readability based on user input."""
|
26 |
# Load Java file if provided
|
|
|
32 |
if not code.strip(): # Ensure input is not empty
|
33 |
return "Please provide a Java code snippet."
|
34 |
|
35 |
+
# Replace comments before tokenization
|
36 |
+
cleaned_code = replace_java_comments(code)
|
37 |
+
|
38 |
# Tokenize and predict
|
39 |
+
tokenized_code = tokenize_java_code(cleaned_code)
|
40 |
prediction = model.predict(tokenized_code)[0][0]
|
41 |
|
42 |
# Convert to readable/unreadable
|
43 |
return "Readable" if prediction > 0.5 else "Unreadable"
|
44 |
|
|
|
45 |
gr.Interface(
|
46 |
fn=classify_code,
|
47 |
inputs=[
|
48 |
gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
|
49 |
gr.File(type="binary", label="Upload Java File (.java)")
|
50 |
],
|
51 |
+
outputs=gr.Text(label="Readability Classification"),
|
52 |
title="Java Code Readability Classifier",
|
53 |
description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
|
54 |
allow_flagging="never"
|