Update print statements in vectorize function to display DataFrame columns and chunk content for improved debugging
Browse files
controllers/vectorizer.py
CHANGED
@@ -48,7 +48,7 @@ def vectorize(article):
|
|
48 |
# df['sentimentScore'] = df['sentimentScore'].round(2)
|
49 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
50 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
51 |
-
print(df)
|
52 |
loader = DataFrameLoader(df, page_content_column="content")
|
53 |
documents = loader.load()
|
54 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -59,6 +59,9 @@ def vectorize(article):
|
|
59 |
)
|
60 |
|
61 |
chunks = text_splitter.split_documents(documents)
|
|
|
|
|
|
|
62 |
ids = []
|
63 |
for chunk in chunks:
|
64 |
_id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
|
|
|
48 |
# df['sentimentScore'] = df['sentimentScore'].round(2)
|
49 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
50 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
51 |
+
print(df.columns)
|
52 |
loader = DataFrameLoader(df, page_content_column="content")
|
53 |
documents = loader.load()
|
54 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
59 |
)
|
60 |
|
61 |
chunks = text_splitter.split_documents(documents)
|
62 |
+
for chunk in chunks:
|
63 |
+
print(chunk)
|
64 |
+
print("*"*50)
|
65 |
ids = []
|
66 |
for chunk in chunks:
|
67 |
_id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
|