Circhastic commited on
Commit
4258e9c
·
1 Parent(s): 6ffc9b2

replaced tokenizer json

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -7,11 +7,13 @@ import tree_sitter_languages # Pre-built parsers for multiple languages
7
  from tokenizers import Tokenizer
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
 
 
10
  tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
11
  model = tf.keras.models.load_model("crv3.keras") # CNN model
12
 
13
  parser = Parser()
14
- parser.set_language(tree_sitter_languages.get_language("java"))
 
15
 
16
  def syntax_aware_tokenize(code):
17
  """Tokenizes Java code using Tree-Sitter (AST-based)."""
 
7
  from tokenizers import Tokenizer
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
 
10
+
11
  tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
12
  model = tf.keras.models.load_model("crv3.keras") # CNN model
13
 
14
  parser = Parser()
15
+ java_lang = tree_sitter_languages.get_language("java") # Get Java language object
16
+ parser.set_language(java_lang) # Set it for the parser
17
 
18
  def syntax_aware_tokenize(code):
19
  """Tokenizes Java code using Tree-Sitter (AST-based)."""