Commit
·
9fd42b6
1
Parent(s):
fe0e2ec
aaaa
Browse files- app.py +3 -2
- requirements.txt +1 -1
app.py
CHANGED
@@ -3,17 +3,18 @@ import numpy as np
|
|
3 |
import tensorflow as tf
|
4 |
import re
|
5 |
from tree_sitter import Language, Parser
|
6 |
-
import tree_sitter_languages # Pre-built parsers for multiple languages
|
7 |
from tokenizers import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
|
10 |
-
|
11 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
12 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
13 |
|
14 |
parser = Parser()
|
15 |
parser.set_language(tree_sitter_languages.get_language("java"))
|
16 |
|
|
|
|
|
17 |
def syntax_aware_tokenize(code):
|
18 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|
19 |
tree = parser.parse(bytes(code, "utf8"))
|
|
|
3 |
import tensorflow as tf
|
4 |
import re
|
5 |
from tree_sitter import Language, Parser
|
6 |
+
# import tree_sitter_languages # Pre-built parsers for multiple languages
|
7 |
from tokenizers import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
|
|
|
10 |
tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
|
11 |
model = tf.keras.models.load_model("crv3.keras") # CNN model
|
12 |
|
13 |
parser = Parser()
|
14 |
parser.set_language(tree_sitter_languages.get_language("java"))
|
15 |
|
16 |
+
PY_LANGUAGE = Language(tspython.language())
|
17 |
+
|
18 |
def syntax_aware_tokenize(code):
|
19 |
"""Tokenizes Java code using Tree-Sitter (AST-based)."""
|
20 |
tree = parser.parse(bytes(code, "utf8"))
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
gradio
|
2 |
tensorflow
|
3 |
tokenizers
|
4 |
-
tree_sitter
|
5 |
tree_sitter_languages==1.10.2
|
|
|
1 |
gradio
|
2 |
tensorflow
|
3 |
tokenizers
|
4 |
+
tree_sitter==0.21.3
|
5 |
tree_sitter_languages==1.10.2
|