kambris commited on
Commit
f62fb31
·
verified ·
1 Parent(s): e480aa0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -239,24 +239,20 @@ def get_embedding_for_text(text, tokenizer, model):
239
  chunk_embeddings = []
240
 
241
  for chunk in chunks:
242
- try:
243
- inputs = tokenizer(
244
- chunk,
245
- return_tensors="pt",
246
- padding=True,
247
- truncation=True,
248
- max_length=512
249
- )
250
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
251
-
252
- with torch.no_grad():
253
- outputs = model(**inputs)
254
-
255
- embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
256
  chunk_embeddings.append(embedding[0])
257
- except Exception as e:
258
- st.warning(f"Error processing chunk: {str(e)}")
259
- continue
260
 
261
  if chunk_embeddings:
262
  weights = np.array([len(chunk.split()) for chunk in chunks])
 
239
  chunk_embeddings = []
240
 
241
  for chunk in chunks:
242
+ inputs = tokenizer(
243
+ chunk,
244
+ return_tensors="pt",
245
+ padding=True,
246
+ truncation=True,
247
+ max_length=512
248
+ )
249
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
250
+
251
+ with torch.no_grad():
252
+ # Access the first element of the tuple which contains the hidden states
253
+ outputs = model(**inputs)[0]
254
+ embedding = outputs[:, 0, :].cpu().numpy()
 
255
  chunk_embeddings.append(embedding[0])
 
 
 
256
 
257
  if chunk_embeddings:
258
  weights = np.array([len(chunk.split()) for chunk in chunks])