Adding sentence transformers and some logs
Browse files- indexer.py +4 -0
- requirements.txt +1 -0
indexer.py
CHANGED
@@ -11,12 +11,15 @@ loader = DirectoryLoader('.', glob="./source/*.html", loader_cls=BSHTMLLoader)
|
|
11 |
docs=loader.load()
|
12 |
|
13 |
#splitting the text into chunks, trying with 1000 size
|
|
|
14 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
15 |
texts = text_splitter.split_documents(docs)
|
16 |
|
17 |
#init db and embeddings
|
|
|
18 |
persist_directory="./index/chroma"
|
19 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
20 |
vectordb = Chroma.from_documents(documents=texts,
|
21 |
embedding=embeddings,
|
22 |
persist_directory=persist_directory)
|
@@ -29,6 +32,7 @@ meta=[{"n":1},{"z":2},{},{},{"n":3},{"n":4}]
|
|
29 |
|
30 |
#docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
|
31 |
#m=docsearch.similarity_search_with_score(query2,filter={"n":2})
|
|
|
32 |
query="How to Increase Flexibility Without Losing Productivity"
|
33 |
docs = vectordb.similarity_search(query)
|
34 |
for i in range(0,len(docs)):
|
|
|
11 |
docs=loader.load()
|
12 |
|
13 |
#splitting the text into chunks, trying with 1000 size
|
14 |
+
print("splitting to chunks")
|
15 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
16 |
texts = text_splitter.split_documents(docs)
|
17 |
|
18 |
#init db and embeddings
|
19 |
+
print("Creating embeddings")
|
20 |
persist_directory="./index/chroma"
|
21 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
22 |
+
print("Storing in db")
|
23 |
vectordb = Chroma.from_documents(documents=texts,
|
24 |
embedding=embeddings,
|
25 |
persist_directory=persist_directory)
|
|
|
32 |
|
33 |
#docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
|
34 |
#m=docsearch.similarity_search_with_score(query2,filter={"n":2})
|
35 |
+
print("Querying db")
|
36 |
query="How to Increase Flexibility Without Losing Productivity"
|
37 |
docs = vectordb.similarity_search(query)
|
38 |
for i in range(0,len(docs)):
|
requirements.txt
CHANGED
@@ -6,5 +6,6 @@ fastapi
|
|
6 |
loguru
|
7 |
chromadb
|
8 |
langchain
|
|
|
9 |
sse_starlette
|
10 |
dropbox
|
|
|
6 |
loguru
|
7 |
chromadb
|
8 |
langchain
|
9 |
+
sentence_transformers
|
10 |
sse_starlette
|
11 |
dropbox
|