OxbridgeEconomics commited on
Commit
d05c91c
·
1 Parent(s): 061b1de
Files changed (1) hide show
  1. daily.py +9 -14
daily.py CHANGED
@@ -22,7 +22,7 @@ with open('xpath.json', 'r', encoding='UTF-8') as f:
22
  DELTA = int(os.environ.get('DELTA', '1'))
23
  print(f"DELTA = {DELTA}")
24
 
25
- # cbirc.gov.cn
26
  i = 1
27
  while i > -1:
28
  CATEGORY_URL = f"https://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=917,pageIndex={i},pageSize=18.json"
@@ -59,7 +59,7 @@ while i > -1:
59
  except Exception as error:
60
  print(error)
61
 
62
- # csrc.gov.cn
63
  i = 1
64
  while i > -1:
65
  if i == 1:
@@ -126,7 +126,7 @@ while i > -1:
126
  except Exception as error:
127
  print(error)
128
 
129
- # data.eastmoney.com
130
  def crawl_eastmoney(url, article):
131
  domain = urlparse(url).netloc
132
  req = urllib.request.urlopen(url)
@@ -173,7 +173,6 @@ while i > -1:
173
  "qType": "3",
174
  }
175
  URL = URL + "?" + "&".join(f"{key}={value}" for key, value in params.items())
176
- print(URL)
177
  content = fetch_url(URL)
178
  if content:
179
  start_index = content.find("(")
@@ -191,12 +190,11 @@ while i > -1:
191
  except Exception as error:
192
  print(error)
193
  else:
194
- print(reportinfo)
195
  i = -1
196
  else:
197
  print("Failed to fetch URL:", url)
198
 
199
- # gov.cn
200
  i = 0
201
  while i > -1:
202
  if i == 0:
@@ -261,7 +259,7 @@ while i > -1:
261
  except Exception as error:
262
  print(error)
263
 
264
- # mof.gov.cn
265
  i = 0
266
  while i > -1:
267
  if i == 0:
@@ -269,7 +267,6 @@ while i > -1:
269
  else:
270
  CATEGORY_URL = f"https://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/index_{i}.htm"
271
  i = i + 1
272
- print(CATEGORY_URL)
273
  req = urllib.request.urlopen(CATEGORY_URL)
274
  text = req.read()
275
  html_text = text.decode("utf-8")
@@ -290,7 +287,6 @@ while i > -1:
290
  article = {}
291
  url = url.replace("../", "https://www.mof.gov.cn/zhengwuxinxi/")
292
  url = url.replace("./", "https://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/")
293
- print(url)
294
  article['category']= "Financial News"
295
  crawl(url, article)
296
  except Exception as error:
@@ -303,7 +299,6 @@ while i > -1:
303
  else:
304
  CATEGORY_URL = f"https://www.mof.gov.cn/zhengwuxinxi/zhengcejiedu/index_{i}.htm"
305
  i = i + 1
306
- print(CATEGORY_URL)
307
  req = urllib.request.urlopen(CATEGORY_URL)
308
  text = req.read()
309
  html_text = text.decode("utf-8")
@@ -329,7 +324,7 @@ while i > -1:
329
  except Exception as error:
330
  print(error)
331
 
332
- # mofcom.gov.cn
333
  categories = ['jdzhsw','jdgnmy','jddwmy','jdtzhz']
334
  for category in categories:
335
  i = 1
@@ -366,7 +361,7 @@ for category in categories:
366
  except Exception as error:
367
  print(error)
368
 
369
- # ndrc.gov.cn
370
  i = 0
371
  while i > -1:
372
  if i == 0:
@@ -405,7 +400,7 @@ while i > -1:
405
  except Exception as error:
406
  print(error)
407
 
408
- # safe.gov.cn
409
  i = 1
410
  while i > -1:
411
  if i == 1:
@@ -468,7 +463,7 @@ while i > -1:
468
  except Exception as error:
469
  print(error)
470
 
471
- # stats.gov.hk
472
  i = 0
473
  while i > -1:
474
  if i == 0:
 
22
  DELTA = int(os.environ.get('DELTA', '1'))
23
  print(f"DELTA = {DELTA}")
24
 
25
+ print("cbirc.gov.cn")
26
  i = 1
27
  while i > -1:
28
  CATEGORY_URL = f"https://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=917,pageIndex={i},pageSize=18.json"
 
59
  except Exception as error:
60
  print(error)
61
 
62
+ print("csrc.gov.cn")
63
  i = 1
64
  while i > -1:
65
  if i == 1:
 
126
  except Exception as error:
127
  print(error)
128
 
129
+ print("data.eastmoney.com")
130
  def crawl_eastmoney(url, article):
131
  domain = urlparse(url).netloc
132
  req = urllib.request.urlopen(url)
 
173
  "qType": "3",
174
  }
175
  URL = URL + "?" + "&".join(f"{key}={value}" for key, value in params.items())
 
176
  content = fetch_url(URL)
177
  if content:
178
  start_index = content.find("(")
 
190
  except Exception as error:
191
  print(error)
192
  else:
 
193
  i = -1
194
  else:
195
  print("Failed to fetch URL:", url)
196
 
197
+ print("gov.cn")
198
  i = 0
199
  while i > -1:
200
  if i == 0:
 
259
  except Exception as error:
260
  print(error)
261
 
262
+ print("mof.gov.cn")
263
  i = 0
264
  while i > -1:
265
  if i == 0:
 
267
  else:
268
  CATEGORY_URL = f"https://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/index_{i}.htm"
269
  i = i + 1
 
270
  req = urllib.request.urlopen(CATEGORY_URL)
271
  text = req.read()
272
  html_text = text.decode("utf-8")
 
287
  article = {}
288
  url = url.replace("../", "https://www.mof.gov.cn/zhengwuxinxi/")
289
  url = url.replace("./", "https://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/")
 
290
  article['category']= "Financial News"
291
  crawl(url, article)
292
  except Exception as error:
 
299
  else:
300
  CATEGORY_URL = f"https://www.mof.gov.cn/zhengwuxinxi/zhengcejiedu/index_{i}.htm"
301
  i = i + 1
 
302
  req = urllib.request.urlopen(CATEGORY_URL)
303
  text = req.read()
304
  html_text = text.decode("utf-8")
 
324
  except Exception as error:
325
  print(error)
326
 
327
+ print("mofcom.gov.cn")
328
  categories = ['jdzhsw','jdgnmy','jddwmy','jdtzhz']
329
  for category in categories:
330
  i = 1
 
361
  except Exception as error:
362
  print(error)
363
 
364
+ print("ndrc.gov.cn")
365
  i = 0
366
  while i > -1:
367
  if i == 0:
 
400
  except Exception as error:
401
  print(error)
402
 
403
+ print("safe.gov.cn")
404
  i = 1
405
  while i > -1:
406
  if i == 1:
 
463
  except Exception as error:
464
  print(error)
465
 
466
+ print("stats.gov.hk")
467
  i = 0
468
  while i > -1:
469
  if i == 0: