Add logging for DataFrame output in vectorize function and log content in _crawl function for better traceability
Browse files- controllers/vectorizer.py +1 -0
- source/eastmoney.py +1 -0
controllers/vectorizer.py
CHANGED
@@ -48,6 +48,7 @@ def vectorize(article):
|
|
48 |
# df['sentimentScore'] = df['sentimentScore'].round(2)
|
49 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
50 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
|
|
51 |
loader = DataFrameLoader(df, page_content_column="content")
|
52 |
documents = loader.load()
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
48 |
# df['sentimentScore'] = df['sentimentScore'].round(2)
|
49 |
# df['sentimentScore'] = df['sentimentScore'].astype(float)
|
50 |
df['publishDate'] = pd.to_datetime(df['publishDate'])
|
51 |
+
print(df)
|
52 |
loader = DataFrameLoader(df, page_content_column="content")
|
53 |
documents = loader.load()
|
54 |
text_splitter = RecursiveCharacterTextSplitter(
|
source/eastmoney.py
CHANGED
@@ -78,6 +78,7 @@ def _crawl(url, article, retries=3):
|
|
78 |
contenteng = ''
|
79 |
for element in contentcn.split("\n"):
|
80 |
contenteng += translate(element) + '\n'
|
|
|
81 |
article['content'] = repr(contenteng)[1:-1].strip()
|
82 |
try:
|
83 |
article['subtitle'] = summarize(article['content'])
|
|
|
78 |
contenteng = ''
|
79 |
for element in contentcn.split("\n"):
|
80 |
contenteng += translate(element) + '\n'
|
81 |
+
logging.info(contenteng)
|
82 |
article['content'] = repr(contenteng)[1:-1].strip()
|
83 |
try:
|
84 |
article['subtitle'] = summarize(article['content'])
|