gavinzli commited on
Commit
293d18b
·
1 Parent(s): ead6f2f

Add logging for DataFrame output in vectorize function and log content in _crawl function for better traceability

Browse files
Files changed (2) hide show
  1. controllers/vectorizer.py +1 -0
  2. source/eastmoney.py +1 -0
controllers/vectorizer.py CHANGED
@@ -48,6 +48,7 @@ def vectorize(article):
48
  # df['sentimentScore'] = df['sentimentScore'].round(2)
49
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
50
  df['publishDate'] = pd.to_datetime(df['publishDate'])
 
51
  loader = DataFrameLoader(df, page_content_column="content")
52
  documents = loader.load()
53
  text_splitter = RecursiveCharacterTextSplitter(
 
48
  # df['sentimentScore'] = df['sentimentScore'].round(2)
49
  # df['sentimentScore'] = df['sentimentScore'].astype(float)
50
  df['publishDate'] = pd.to_datetime(df['publishDate'])
51
+ print(df)
52
  loader = DataFrameLoader(df, page_content_column="content")
53
  documents = loader.load()
54
  text_splitter = RecursiveCharacterTextSplitter(
source/eastmoney.py CHANGED
@@ -78,6 +78,7 @@ def _crawl(url, article, retries=3):
78
  contenteng = ''
79
  for element in contentcn.split("\n"):
80
  contenteng += translate(element) + '\n'
 
81
  article['content'] = repr(contenteng)[1:-1].strip()
82
  try:
83
  article['subtitle'] = summarize(article['content'])
 
78
  contenteng = ''
79
  for element in contentcn.split("\n"):
80
  contenteng += translate(element) + '\n'
81
+ logging.info(contenteng)
82
  article['content'] = repr(contenteng)[1:-1].strip()
83
  try:
84
  article['subtitle'] = summarize(article['content'])