pradeepsengarr commited on
Commit
5256379
·
verified ·
1 Parent(s): 4977945

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -468,11 +468,29 @@ def split_text_into_chunks(text):
468
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
469
  return splitter.create_documents([text])
470
 
 
 
 
 
 
 
 
471
  # --- Create Vector DB ---
472
  def create_vectorstore(documents):
 
 
 
 
 
 
 
473
  model = SentenceTransformer("all-MiniLM-L6-v2", device='cpu')
474
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
475
- db = Chroma.from_documents(documents, embeddings, persist_directory=persist_directory)
 
 
 
 
476
  db.persist()
477
  return db
478
 
 
468
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
469
  return splitter.create_documents([text])
470
 
471
+ import os
472
+ import tempfile
473
+ import shutil
474
+ from langchain.vectorstores import Chroma
475
+ from langchain.embeddings import HuggingFaceEmbeddings
476
+ from sentence_transformers import SentenceTransformer
477
+
478
  # --- Create Vector DB ---
479
  def create_vectorstore(documents):
480
+ # Setup a writable directory for Chroma
481
+ chroma_dir = os.path.join(tempfile.gettempdir(), "chroma_db")
482
+ if os.path.exists(chroma_dir):
483
+ shutil.rmtree(chroma_dir) # Clear any old data
484
+ os.makedirs(chroma_dir, exist_ok=True)
485
+
486
+ # Initialize the model and embeddings
487
  model = SentenceTransformer("all-MiniLM-L6-v2", device='cpu')
488
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
489
+
490
+ # Create the Chroma database
491
+ db = Chroma.from_documents(documents, embeddings, persist_directory=chroma_dir)
492
+
493
+ # Persist the Chroma database
494
  db.persist()
495
  return db
496