Commit
·
4258e9c
1
Parent(s):
6ffc9b2
replaced tokenizer json
Browse files
app.py
CHANGED
@@ -7,11 +7,13 @@ import tree_sitter_languages # Pre-built parsers for multiple languages
|
|
7 |
from tokenizers import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
|
|
|
10 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
11 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
12 |
|
13 |
parser = Parser()
|
14 |
-
|
|
|
15 |
|
16 |
def syntax_aware_tokenize(code):
|
17 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|
|
|
7 |
from tokenizers import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
|
10 |
+
|
11 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
12 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
13 |
|
14 |
parser = Parser()
|
15 |
+
java_lang = tree_sitter_languages.get_language("java") # Get Java language object
|
16 |
+
parser.set_language(java_lang) # Set it for the parser
|
17 |
|
18 |
def syntax_aware_tokenize(code):
|
19 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|