user commited on
Commit
7ebdd2b
·
1 Parent(s): 97426bb

Fix tokenizer error

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -66,12 +66,15 @@ def load_and_process_text(file_path):
66
  return []
67
 
68
  @st.cache_data
69
- def create_embeddings(chunks, _embedding_model):
 
 
 
70
  embeddings = []
71
  for chunk in chunks:
72
  inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
73
  with torch.no_grad():
74
- outputs = _embedding_model(**inputs)
75
  embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
76
  return np.array(embeddings)
77
 
 
66
  return []
67
 
68
  @st.cache_data
69
+ def create_embeddings(chunks, embedding_model):
70
+ tokenizer = AutoTokenizer.from_pretrained(embedding_model)
71
+ model = AutoModel.from_pretrained(embedding_model)
72
+
73
  embeddings = []
74
  for chunk in chunks:
75
  inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
76
  with torch.no_grad():
77
+ outputs = model(**inputs)
78
  embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
79
  return np.array(embeddings)
80