gavinzli commited on
Commit
d20886e
·
1 Parent(s): e35c5d9

Add debug print statements in vectorize function for tracing execution flow

Browse files
Files changed (1) hide show
  1. controllers/vectorizer.py +5 -0
controllers/vectorizer.py CHANGED
@@ -37,6 +37,7 @@ def vectorize(article):
37
  Returns:
38
  None
39
  """
 
40
  article['id'] = str(article['id'])
41
  if isinstance(article, dict):
42
  article = [article] # Convert single dictionary to list of dictionaries
@@ -49,6 +50,7 @@ def vectorize(article):
49
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
50
  df['publishDate'] = pd.to_datetime(df['publishDate'])
51
  print(df.columns)
 
52
  loader = DataFrameLoader(df, page_content_column="content")
53
  documents = loader.load()
54
  text_splitter = RecursiveCharacterTextSplitter(
@@ -65,6 +67,9 @@ def vectorize(article):
65
  ids = []
66
  for chunk in chunks:
67
  _id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
 
 
68
  ids.append(_id)
69
  inserted_ids = vstore.add_documents(chunks, ids=ids)
 
70
  logging.info(inserted_ids)
 
37
  Returns:
38
  None
39
  """
40
+ print("&"*50)
41
  article['id'] = str(article['id'])
42
  if isinstance(article, dict):
43
  article = [article] # Convert single dictionary to list of dictionaries
 
50
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
51
  df['publishDate'] = pd.to_datetime(df['publishDate'])
52
  print(df.columns)
53
+ print(df['content'].values[0])
54
  loader = DataFrameLoader(df, page_content_column="content")
55
  documents = loader.load()
56
  text_splitter = RecursiveCharacterTextSplitter(
 
67
  ids = []
68
  for chunk in chunks:
69
  _id = f"{chunk.metadata['id']}-{str(uuid.uuid5(uuid.NAMESPACE_OID,chunk.page_content))}"
70
+ print(_id)
71
+ print("-"*50)
72
  ids.append(_id)
73
  inserted_ids = vstore.add_documents(chunks, ids=ids)
74
+ print(inserted_ids)
75
  logging.info(inserted_ids)