File size: 1,269 Bytes
74475ac
23b7557
 
 
 
74475ac
 
 
 
 
23b7557
74475ac
a13f43a
23b7557
 
74475ac
 
 
1c5f2e5
 
 
 
74475ac
 
 
 
82a33ed
5d719e2
 
 
 
74475ac
85f3efe
74475ac
 
 
 
 
 
 
e35c5d9
a13f43a
23b7557
5d719e2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
This script is responsible for collecting data from various websites \
related to financial and policy information in China. It fetches data from various sources,\
extracts relevant information, translates it, and updates the content accordingly.
Collected data includes policy interpretations, financial news, macroeconomic research.
"""
import logging
import os

from dotenv import load_dotenv
from prefect import flow

from source import cbirc, csrc, eastmoney, gov, mofcom, ndrc, safe, stats, mof, sohu_ccef
# from glue import glue_job_run

load_dotenv()

logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s',
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO
)

delta = int(os.environ.get('DELTA') or '1')
logging.info("DELTA = %s", delta)

@flow(name="Data Collection - Daily", log_prints=True)
def main():
    """
    Orchestrates the data collection process by calling the crawl functions of different sources.
    """
    eastmoney.crawl(delta)
    # cbirc.crawl(delta)
    csrc.crawl(delta)
    stats.crawl(delta)
    gov.crawl(delta)
    safe.crawl(delta)
    mofcom.crawl(delta)
    ndrc.crawl(delta)
    mof.crawl(delta)
    # glue_job_run()
    sohu_ccef.crawl(delta)

if __name__ == '__main__':
    main()