Spaces:

bardicreels
/

rag2

Sleeping

user commited on Sep 12, 2024

Commit

b3b4e83

1 Parent(s): 5412ab7

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,8 +67,13 @@ def load_and_process_text(file_path):
 @st.cache_data
 def create_embeddings(chunks, _embedding_model):
-    tokenizer = AutoTokenizer.from_pretrained(_embedding_model)
-    model = AutoModel.from_pretrained(_embedding_model)
     embeddings = []
     for chunk in chunks:
@@ -76,7 +81,8 @@ def create_embeddings(chunks, _embedding_model):
         with torch.no_grad():
             outputs = model(**inputs)
         embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
-    return np.array(embeddings)
 @st.cache_resource
 def create_faiss_index(embeddings):

 @st.cache_data
 def create_embeddings(chunks, _embedding_model):
+    if isinstance(_embedding_model, str):
+        tokenizer = AutoTokenizer.from_pretrained(_embedding_model)
+        model = AutoModel.from_pretrained(_embedding_model)
+    else:
+        # Assume _embedding_model is already a model instance
+        model = _embedding_model
+        tokenizer = AutoTokenizer.from_pretrained(model.config._name_or_path)
     embeddings = []
     for chunk in chunks:
         with torch.no_grad():
             outputs = model(**inputs)
         embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
+    return np.vstack(embeddings)
 @st.cache_resource
 def create_faiss_index(embeddings):