Spaces:

raghuv-aditya
/

Course-Finder-AI-Large

Sleeping

raghuv-aditya commited on Nov 10, 2024

Commit

aace4d6

verified ·

1 Parent(s): 68a165d

Create embedding_storage.py

Files changed (1) hide show

embedding_storage.py ADDED Viewed

+from langchain_openai import OpenAIEmbeddings
+from langchain_chroma import Chroma
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+import os
+from config import PERSIST_DIRECTORY
+def process_safety_with_chroma(text):
+    """
+    Processes and stores the given text into ChromaDB.
+    Args:
+        text (str): Text to be embedded and stored.
+    Returns:
+        Chroma: The Chroma vector store object.
+    """
+    if os.path.exists(PERSIST_DIRECTORY):
+        vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings())
+    else:
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
+        text_chunks = text_splitter.split_text(text)
+        documents = [Document(page_content=chunk, metadata={"source": f"chunk_{i}"}) for i, chunk in enumerate(text_chunks)]
+        embeddings = OpenAIEmbeddings()
+        vector_store = Chroma.from_documents(documents, embeddings, persist_directory=PERSIST_DIRECTORY)
+    return vector_store