File size: 1,269 Bytes
74475ac 23b7557 74475ac 23b7557 74475ac a13f43a 23b7557 74475ac 1c5f2e5 74475ac 82a33ed 5d719e2 74475ac 85f3efe 74475ac e35c5d9 a13f43a 23b7557 5d719e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
"""
This script is responsible for collecting data from various websites \
related to financial and policy information in China. It fetches data from various sources,\
extracts relevant information, translates it, and updates the content accordingly.
Collected data includes policy interpretations, financial news, macroeconomic research.
"""
import logging
import os
from dotenv import load_dotenv
from prefect import flow
from source import cbirc, csrc, eastmoney, gov, mofcom, ndrc, safe, stats, mof, sohu_ccef
# from glue import glue_job_run
load_dotenv()
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO
)
delta = int(os.environ.get('DELTA') or '1')
logging.info("DELTA = %s", delta)
@flow(name="Data Collection - Daily", log_prints=True)
def main():
"""
Orchestrates the data collection process by calling the crawl functions of different sources.
"""
eastmoney.crawl(delta)
# cbirc.crawl(delta)
csrc.crawl(delta)
stats.crawl(delta)
gov.crawl(delta)
safe.crawl(delta)
mofcom.crawl(delta)
ndrc.crawl(delta)
mof.crawl(delta)
# glue_job_run()
sohu_ccef.crawl(delta)
if __name__ == '__main__':
main()
|