kambris commited on
Commit
a6cdac2
·
verified ·
1 Parent(s): c671da9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -17,8 +17,8 @@ def generate_embeddings(texts):
17
  embeddings = []
18
 
19
  for text in texts:
20
- # Tokenize the text (do not truncate)
21
- tokens = bert_tokenizer.tokenizer.encode(text, truncation=False) # Get tokens without truncation
22
 
23
  # Split the tokens into chunks of size 512 (maximum length)
24
  chunked_texts = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
@@ -85,4 +85,3 @@ if uploaded_file is not None:
85
  st.write(result_df.head())
86
  except Exception as e:
87
  st.error(f"Error: {e}")
88
-
 
17
  embeddings = []
18
 
19
  for text in texts:
20
+ # Tokenize the text with truncation set to False
21
+ tokens = bert_tokenizer.tokenizer.encode(text, truncation=False) # Do not truncate here
22
 
23
  # Split the tokens into chunks of size 512 (maximum length)
24
  chunked_texts = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
 
85
  st.write(result_df.head())
86
  except Exception as e:
87
  st.error(f"Error: {e}")