Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -468,11 +468,29 @@ def split_text_into_chunks(text):
|
|
468 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
469 |
return splitter.create_documents([text])
|
470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
# --- Create Vector DB ---
|
472 |
def create_vectorstore(documents):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
model = SentenceTransformer("all-MiniLM-L6-v2", device='cpu')
|
474 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
475 |
-
|
|
|
|
|
|
|
|
|
476 |
db.persist()
|
477 |
return db
|
478 |
|
|
|
468 |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
469 |
return splitter.create_documents([text])
|
470 |
|
471 |
+
import os
|
472 |
+
import tempfile
|
473 |
+
import shutil
|
474 |
+
from langchain.vectorstores import Chroma
|
475 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
476 |
+
from sentence_transformers import SentenceTransformer
|
477 |
+
|
478 |
# --- Create Vector DB ---
|
479 |
def create_vectorstore(documents):
|
480 |
+
# Setup a writable directory for Chroma
|
481 |
+
chroma_dir = os.path.join(tempfile.gettempdir(), "chroma_db")
|
482 |
+
if os.path.exists(chroma_dir):
|
483 |
+
shutil.rmtree(chroma_dir) # Clear any old data
|
484 |
+
os.makedirs(chroma_dir, exist_ok=True)
|
485 |
+
|
486 |
+
# Initialize the model and embeddings
|
487 |
model = SentenceTransformer("all-MiniLM-L6-v2", device='cpu')
|
488 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
489 |
+
|
490 |
+
# Create the Chroma database
|
491 |
+
db = Chroma.from_documents(documents, embeddings, persist_directory=chroma_dir)
|
492 |
+
|
493 |
+
# Persist the Chroma database
|
494 |
db.persist()
|
495 |
return db
|
496 |
|