Circhastic commited on
Commit
9fd42b6
·
1 Parent(s): fe0e2ec
Files changed (2) hide show
  1. app.py +3 -2
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,17 +3,18 @@ import numpy as np
3
  import tensorflow as tf
4
  import re
5
  from tree_sitter import Language, Parser
6
- import tree_sitter_languages # Pre-built parsers for multiple languages
7
  from tokenizers import Tokenizer
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
 
10
-
11
  tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
12
  model = tf.keras.models.load_model("crv3.keras") # CNN model
13
 
14
  parser = Parser()
15
  parser.set_language(tree_sitter_languages.get_language("java"))
16
 
 
 
17
  def syntax_aware_tokenize(code):
18
  """Tokenizes Java code using Tree-Sitter (AST-based)."""
19
  tree = parser.parse(bytes(code, "utf8"))
 
3
  import tensorflow as tf
4
  import re
5
  from tree_sitter import Language, Parser
6
+ # import tree_sitter_languages # Pre-built parsers for multiple languages
7
  from tokenizers import Tokenizer
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
 
 
10
  tokenizer = Tokenizer.from_file("syntax_bpe_tokenizer.json") # New BPE tokenizer
11
  model = tf.keras.models.load_model("crv3.keras") # CNN model
12
 
13
  parser = Parser()
14
  parser.set_language(tree_sitter_languages.get_language("java"))
15
 
16
+ PY_LANGUAGE = Language(tspython.language())
17
+
18
  def syntax_aware_tokenize(code):
19
  """Tokenizes Java code using Tree-Sitter (AST-based)."""
20
  tree = parser.parse(bytes(code, "utf8"))
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  gradio
2
  tensorflow
3
  tokenizers
4
- tree_sitter
5
  tree_sitter_languages==1.10.2
 
1
  gradio
2
  tensorflow
3
  tokenizers
4
+ tree_sitter==0.21.3
5
  tree_sitter_languages==1.10.2