gavinzli commited on
Commit
4f4a669
·
1 Parent(s): 7dcce70

Update collection name to "articles" and enable separator regex in vectorization logic

Browse files
Files changed (1) hide show
  1. controllers/vectorizer.py +2 -2
controllers/vectorizer.py CHANGED
@@ -43,7 +43,7 @@ vstore = AstraDBVectorStore(
43
  },
44
  ),
45
  namespace="default_keyspace",
46
- collection_name="article",
47
  token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
48
  api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"])
49
 
@@ -106,7 +106,7 @@ def vectorize(article):
106
  chunk_size=1000,
107
  chunk_overlap=200,
108
  length_function=token_length,
109
- is_separator_regex=False,
110
  separators=["\n\n", "\n", "\t"] # Logical separators
111
  )
112
  chunks = text_splitter.split_documents(documents)
 
43
  },
44
  ),
45
  namespace="default_keyspace",
46
+ collection_name="articles",
47
  token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
48
  api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"])
49
 
 
106
  chunk_size=1000,
107
  chunk_overlap=200,
108
  length_function=token_length,
109
+ is_separator_regex=True,
110
  separators=["\n\n", "\n", "\t"] # Logical separators
111
  )
112
  chunks = text_splitter.split_documents(documents)