Add debug print statements in vectorize function for tracing execution flow
Browse files
controllers/vectorizer.py
CHANGED
@@ -37,6 +37,7 @@ def vectorize(article):
|
|
37 |
Returns:
|
38 |
None
|
39 |
"""
|
|
|
40 |
article['id'] = str(article['id'])
|
41 |
if isinstance(article, dict):
|
42 |
article = [article] # Convert single dictionary to list of dictionaries
|
@@ -49,6 +50,7 @@ def vectorize(article):
|
|
49 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
50 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
51 |
print(df.columns)
|
|
|
52 |
loader = DataFrameLoader(df, page_content_column="content")
|
53 |
documents = loader.load()
|
54 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -65,6 +67,9 @@ def vectorize(article):
|
|
65 |
ids = []
|
66 |
for chunk in chunks:
|
67 |
_id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
|
|
|
|
|
68 |
ids.append(_id)
|
69 |
inserted_ids = vstore.add_documents(chunks, ids=ids)
|
|
|
70 |
logging.info(inserted_ids)
|
|
|
37 |
Returns:
|
38 |
None
|
39 |
"""
|
40 |
+
print("&"*50)
|
41 |
article['id'] = str(article['id'])
|
42 |
if isinstance(article, dict):
|
43 |
article = [article] # Convert single dictionary to list of dictionaries
|
|
|
50 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
51 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
52 |
print(df.columns)
|
53 |
+
print(df['content'].values[0])
|
54 |
loader = DataFrameLoader(df, page_content_column="content")
|
55 |
documents = loader.load()
|
56 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
67 |
ids = []
|
68 |
for chunk in chunks:
|
69 |
_id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
|
70 |
+
print(_id)
|
71 |
+
print("-"*50)
|
72 |
ids.append(_id)
|
73 |
inserted_ids = vstore.add_documents(chunks, ids=ids)
|
74 |
+
print(inserted_ids)
|
75 |
logging.info(inserted_ids)
|