anubhav77 commited on
Commit
3a51e33
·
1 Parent(s): be94be8

Adding sentence transformers and some logs

Browse files
Files changed (2) hide show
  1. indexer.py +4 -0
  2. requirements.txt +1 -0
indexer.py CHANGED
@@ -11,12 +11,15 @@ loader = DirectoryLoader('.', glob="./source/*.html", loader_cls=BSHTMLLoader)
11
  docs=loader.load()
12
 
13
  #splitting the text into chunks, trying with 1000 size
 
14
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
15
  texts = text_splitter.split_documents(docs)
16
 
17
  #init db and embeddings
 
18
  persist_directory="./index/chroma"
19
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
20
  vectordb = Chroma.from_documents(documents=texts,
21
  embedding=embeddings,
22
  persist_directory=persist_directory)
@@ -29,6 +32,7 @@ meta=[{"n":1},{"z":2},{},{},{"n":3},{"n":4}]
29
 
30
  #docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
31
  #m=docsearch.similarity_search_with_score(query2,filter={"n":2})
 
32
  query="How to Increase Flexibility Without Losing Productivity"
33
  docs = vectordb.similarity_search(query)
34
  for i in range(0,len(docs)):
 
11
  docs=loader.load()
12
 
13
  #splitting the text into chunks, trying with 1000 size
14
+ print("splitting to chunks")
15
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
16
  texts = text_splitter.split_documents(docs)
17
 
18
  #init db and embeddings
19
+ print("Creating embeddings")
20
  persist_directory="./index/chroma"
21
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
22
+ print("Storing in db")
23
  vectordb = Chroma.from_documents(documents=texts,
24
  embedding=embeddings,
25
  persist_directory=persist_directory)
 
32
 
33
  #docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
34
  #m=docsearch.similarity_search_with_score(query2,filter={"n":2})
35
+ print("Querying db")
36
  query="How to Increase Flexibility Without Losing Productivity"
37
  docs = vectordb.similarity_search(query)
38
  for i in range(0,len(docs)):
requirements.txt CHANGED
@@ -6,5 +6,6 @@ fastapi
6
  loguru
7
  chromadb
8
  langchain
 
9
  sse_starlette
10
  dropbox
 
6
  loguru
7
  chromadb
8
  langchain
9
+ sentence_transformers
10
  sse_starlette
11
  dropbox