gavinzli commited on
Commit
fdff7f3
·
1 Parent(s): 293d18b

Update print statements in vectorize function to display DataFrame columns and chunk content for improved debugging

Browse files
Files changed (1) hide show
  1. controllers/vectorizer.py +4 -1
controllers/vectorizer.py CHANGED
@@ -48,7 +48,7 @@ def vectorize(article):
48
  # df['sentimentScore'] = df['sentimentScore'].round(2)
49
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
50
  df['publishDate'] = pd.to_datetime(df['publishDate'])
51
- print(df)
52
  loader = DataFrameLoader(df, page_content_column="content")
53
  documents = loader.load()
54
  text_splitter = RecursiveCharacterTextSplitter(
@@ -59,6 +59,9 @@ def vectorize(article):
59
  )
60
 
61
  chunks = text_splitter.split_documents(documents)
 
 
 
62
  ids = []
63
  for chunk in chunks:
64
  _id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
 
48
  # df['sentimentScore'] = df['sentimentScore'].round(2)
49
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
50
  df['publishDate'] = pd.to_datetime(df['publishDate'])
51
+ print(df.columns)
52
  loader = DataFrameLoader(df, page_content_column="content")
53
  documents = loader.load()
54
  text_splitter = RecursiveCharacterTextSplitter(
 
59
  )
60
 
61
  chunks = text_splitter.split_documents(documents)
62
+ for chunk in chunks:
63
+ print(chunk)
64
+ print("*"*50)
65
  ids = []
66
  for chunk in chunks:
67
  _id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"