Spaces:
Sleeping
Sleeping
user
commited on
Commit
·
7ebdd2b
1
Parent(s):
97426bb
Fix tokenizer error
Browse files
app.py
CHANGED
@@ -66,12 +66,15 @@ def load_and_process_text(file_path):
|
|
66 |
return []
|
67 |
|
68 |
@st.cache_data
|
69 |
-
def create_embeddings(chunks,
|
|
|
|
|
|
|
70 |
embeddings = []
|
71 |
for chunk in chunks:
|
72 |
inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
73 |
with torch.no_grad():
|
74 |
-
outputs =
|
75 |
embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
|
76 |
return np.array(embeddings)
|
77 |
|
|
|
66 |
return []
|
67 |
|
68 |
@st.cache_data
|
69 |
+
def create_embeddings(chunks, embedding_model):
|
70 |
+
tokenizer = AutoTokenizer.from_pretrained(embedding_model)
|
71 |
+
model = AutoModel.from_pretrained(embedding_model)
|
72 |
+
|
73 |
embeddings = []
|
74 |
for chunk in chunks:
|
75 |
inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
76 |
with torch.no_grad():
|
77 |
+
outputs = model(**inputs)
|
78 |
embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
|
79 |
return np.array(embeddings)
|
80 |
|