raghuv-aditya commited on
Commit
aace4d6
·
verified ·
1 Parent(s): 68a165d

Create embedding_storage.py

Browse files
Files changed (1) hide show
  1. embedding_storage.py +29 -0
embedding_storage.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_chroma import Chroma
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.docstore.document import Document
5
+ import os
6
+
7
+ from config import PERSIST_DIRECTORY
8
+
9
+ def process_safety_with_chroma(text):
10
+ """
11
+ Processes and stores the given text into ChromaDB.
12
+
13
+ Args:
14
+ text (str): Text to be embedded and stored.
15
+
16
+ Returns:
17
+ Chroma: The Chroma vector store object.
18
+ """
19
+ if os.path.exists(PERSIST_DIRECTORY):
20
+ vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings())
21
+ else:
22
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
23
+ text_chunks = text_splitter.split_text(text)
24
+ documents = [Document(page_content=chunk, metadata={"source": f"chunk_{i}"}) for i, chunk in enumerate(text_chunks)]
25
+
26
+ embeddings = OpenAIEmbeddings()
27
+ vector_store = Chroma.from_documents(documents, embeddings, persist_directory=PERSIST_DIRECTORY)
28
+
29
+ return vector_store