OxbridgeEconomics commited on
Commit
96825f6
·
unverified ·
1 Parent(s): 99b66e9

Update vectorizer.py

Browse files
Files changed (1) hide show
  1. controllers/vectorizer.py +7 -2
controllers/vectorizer.py CHANGED
@@ -1,6 +1,7 @@
1
  """Module to upsert data into AstraDB"""
2
  import os
3
  import logging
 
4
 
5
  import pandas as pd
6
  from langchain_astradb import AstraDBVectorStore
@@ -52,6 +53,10 @@ def vectorize(article):
52
  is_separator_regex=False,
53
  )
54
 
55
- docs = text_splitter.split_documents(documents)
56
- inserted_ids = vstore.add_documents(docs)
 
 
 
 
57
  logging.info(inserted_ids)
 
1
  """Module to upsert data into AstraDB"""
2
  import os
3
  import logging
4
+ import uuid
5
 
6
  import pandas as pd
7
  from langchain_astradb import AstraDBVectorStore
 
53
  is_separator_regex=False,
54
  )
55
 
56
+ chunks = text_splitter.split_documents(documents)
57
+ ids = []
58
+ for chunk in chunks:
59
+ id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
60
+ ids.append(id)
61
+ inserted_ids = vstore.add_documents(chunks, ids=ids)
62
  logging.info(inserted_ids)