M17idd commited on
Commit
cc9574d
·
verified ·
1 Parent(s): 76016b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -13,16 +13,13 @@ from langchain.chat_models import ChatOpenAI
13
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-base-uncased")
14
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-base-uncased")
15
 
 
16
  def get_embedding(text):
17
- sub_chunks = split_text_to_chunks(text)
18
- all_embeddings = []
19
- for chunk in sub_chunks:
20
- inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512)
21
- with torch.no_grad():
22
- outputs = model(**inputs)
23
- embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
24
- all_embeddings.append(embedding)
25
- return np.mean(all_embeddings, axis=0)
26
 
27
  def cosine_similarity(vec1, vec2):
28
  return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
 
13
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-base-uncased")
14
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-base-uncased")
15
 
16
+ @st.cache
17
  def get_embedding(text):
18
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
19
+ with torch.no_grad():
20
+ outputs = model(**inputs)
21
+ embeddings = outputs.last_hidden_state.mean(dim=1)
22
+ return embeddings.squeeze().numpy()
 
 
 
 
23
 
24
  def cosine_similarity(vec1, vec2):
25
  return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))