OxbridgeEconomics
commited on
Update vectorizer.py
Browse files
controllers/vectorizer.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""Module to upsert data into AstraDB"""
|
2 |
import os
|
3 |
import logging
|
|
|
4 |
|
5 |
import pandas as pd
|
6 |
from langchain_astradb import AstraDBVectorStore
|
@@ -52,6 +53,10 @@ def vectorize(article):
|
|
52 |
is_separator_regex=False,
|
53 |
)
|
54 |
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
logging.info(inserted_ids)
|
|
|
1 |
"""Module to upsert data into AstraDB"""
|
2 |
import os
|
3 |
import logging
|
4 |
+
import uuid
|
5 |
|
6 |
import pandas as pd
|
7 |
from langchain_astradb import AstraDBVectorStore
|
|
|
53 |
is_separator_regex=False,
|
54 |
)
|
55 |
|
56 |
+
chunks = text_splitter.split_documents(documents)
|
57 |
+
ids = []
|
58 |
+
for chunk in chunks:
|
59 |
+
id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
|
60 |
+
ids.append(id)
|
61 |
+
inserted_ids = vstore.add_documents(chunks, ids=ids)
|
62 |
logging.info(inserted_ids)
|