M17idd commited on
Commit
76016b2
·
verified ·
1 Parent(s): 5a87c2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -13,13 +13,16 @@ from langchain.chat_models import ChatOpenAI
13
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-base-uncased")
14
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-base-uncased")
15
 
16
- @st.cache
17
  def get_embedding(text):
18
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
19
- with torch.no_grad():
20
- outputs = model(**inputs)
21
- embeddings = outputs.last_hidden_state.mean(dim=1)
22
- return embeddings.squeeze().numpy()
 
 
 
 
23
 
24
  def cosine_similarity(vec1, vec2):
25
  return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
 
13
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-base-uncased")
14
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-base-uncased")
15
 
 
16
  def get_embedding(text):
17
+ sub_chunks = split_text_to_chunks(text)
18
+ all_embeddings = []
19
+ for chunk in sub_chunks:
20
+ inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512)
21
+ with torch.no_grad():
22
+ outputs = model(**inputs)
23
+ embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
24
+ all_embeddings.append(embedding)
25
+ return np.mean(all_embeddings, axis=0)
26
 
27
  def cosine_similarity(vec1, vec2):
28
  return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))