File size: 1,227 Bytes
74475ac
 
 
 
 
 
 
 
 
 
 
01677a0
5d719e2
74475ac
 
 
 
 
 
 
 
 
 
82a33ed
5d719e2
 
 
 
74475ac
 
 
 
 
 
 
 
 
01677a0
5d719e2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""
This script is responsible for collecting data from various websites related to financial and policy information in China.
It fetches data from different sources, extracts relevant information, translates it, and updates the content accordingly.
The collected data includes policy interpretations, financial news, macroeconomic research, and more.
"""
import logging
import os

from dotenv import load_dotenv

from source import cbirc, csrc, eastmoney, gov, mofcom, ndrc, safe, stats, mof
from glue import glue_job_run
from prefect import flow

load_dotenv()

logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
logging.getLogger().setLevel(logging.INFO)

delta = int(os.environ.get('DELTA') or '1')
logging.info("DELTA = %s", delta)

@flow(name="Data Collection - Daily", log_prints=True)
def main():
    """
    Orchestrates the data collection process by calling the crawl functions of different sources.
    """
    eastmoney.crawl(delta)
    cbirc.crawl(delta)
    csrc.crawl(delta)
    stats.crawl(delta)
    gov.crawl(delta)
    safe.crawl(delta)
    mofcom.crawl(delta)
    ndrc.crawl(delta)
    mof.crawl(delta)
    glue_job_run()
    
if __name__ == '__main__':
    main()