Spaces:

kambris
/

SoLProject

Runtime error

kambris commited on Nov 23, 2024

Commit

a6cdac2

verified ·

1 Parent(s): c671da9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,8 +17,8 @@ def generate_embeddings(texts):
     embeddings = []
     for text in texts:
-        # Tokenize the text (do not truncate)
-        tokens = bert_tokenizer.tokenizer.encode(text, truncation=False)  # Get tokens without truncation
         # Split the tokens into chunks of size 512 (maximum length)
         chunked_texts = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
@@ -85,4 +85,3 @@ if uploaded_file is not None:
             st.write(result_df.head())
     except Exception as e:
         st.error(f"Error: {e}")

     embeddings = []
     for text in texts:
+        # Tokenize the text with truncation set to False
+        tokens = bert_tokenizer.tokenizer.encode(text, truncation=False)  # Do not truncate here
         # Split the tokens into chunks of size 512 (maximum length)
         chunked_texts = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
             st.write(result_df.head())
     except Exception as e:
         st.error(f"Error: {e}")