idkash1 commited on
Commit
e946844
·
verified ·
1 Parent(s): 02fde74

Update human_text_detect.py

Browse files
Files changed (1) hide show
  1. human_text_detect.py +7 -2
human_text_detect.py CHANGED
@@ -94,7 +94,6 @@ def detect_human_text(model_name, topic, text):
94
  max_tokens_per_sentence = 100
95
 
96
  cache_dir = f"/tmp/cacheHuggingface/{model_name}"
97
- os.environ["TRANSFORMERS_CACHE"] = cache_dir
98
 
99
  print('Create dir')
100
  # Use a writable directory inside the Hugging Face Space
@@ -104,7 +103,13 @@ def detect_human_text(model_name, topic, text):
104
  # Init model
105
  print('Init tokenizer')
106
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
107
- tokenizer = AutoTokenizer.from_pretrained(cache_dir) # lm_name, cache_dir=cache_dir
 
 
 
 
 
 
108
 
109
  print('Init model')
110
  model = AutoModelForCausalLM.from_pretrained(lm_name) #, cache_dir=cache_dir
 
94
  max_tokens_per_sentence = 100
95
 
96
  cache_dir = f"/tmp/cacheHuggingface/{model_name}"
 
97
 
98
  print('Create dir')
99
  # Use a writable directory inside the Hugging Face Space
 
103
  # Init model
104
  print('Init tokenizer')
105
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
106
+ tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
107
+
108
+ print("Save tokenizer")
109
+ tokenizer.save_pretrained(cache_dir)
110
+
111
+ print("Checking saved tokenizer files in:", cache_dir)
112
+ print(os.listdir(cache_dir))
113
 
114
  print('Init model')
115
  model = AutoModelForCausalLM.from_pretrained(lm_name) #, cache_dir=cache_dir