Update collection name to "articles" and enable separator regex in vectorization logic
Browse files
controllers/vectorizer.py
CHANGED
@@ -43,7 +43,7 @@ vstore = AstraDBVectorStore(
|
|
43 |
},
|
44 |
),
|
45 |
namespace="default_keyspace",
|
46 |
-
collection_name="
|
47 |
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
|
48 |
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"])
|
49 |
|
@@ -106,7 +106,7 @@ def vectorize(article):
|
|
106 |
chunk_size=1000,
|
107 |
chunk_overlap=200,
|
108 |
length_function=token_length,
|
109 |
-
is_separator_regex=
|
110 |
separators=["\n\n", "\n", "\t"] # Logical separators
|
111 |
)
|
112 |
chunks = text_splitter.split_documents(documents)
|
|
|
43 |
},
|
44 |
),
|
45 |
namespace="default_keyspace",
|
46 |
+
collection_name="articles",
|
47 |
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
|
48 |
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"])
|
49 |
|
|
|
106 |
chunk_size=1000,
|
107 |
chunk_overlap=200,
|
108 |
length_function=token_length,
|
109 |
+
is_separator_regex=True,
|
110 |
separators=["\n\n", "\n", "\t"] # Logical separators
|
111 |
)
|
112 |
chunks = text_splitter.split_documents(documents)
|