Muhammad Abdur Rahman Saad commited on
Commit
122b0c4
·
1 Parent(s): e262e38

Update csrc.py

Browse files
Files changed (1) hide show
  1. source/csrc.py +6 -2
source/csrc.py CHANGED
@@ -44,8 +44,12 @@ def crawl(delta):
44
  i = i + 1
45
  logger.info(f"Fetching from URL: {category_url}")
46
  logger.info(category_url)
47
- req = urllib.request.urlopen(category_url)
48
- text = req.read()
 
 
 
 
49
  html_text = text.decode("utf-8")
50
  page = etree.HTML(html_text)
51
  articlelist = page.xpath(
 
44
  i = i + 1
45
  logger.info(f"Fetching from URL: {category_url}")
46
  logger.info(category_url)
47
+ req = urllib.request.Request(
48
+ category_url,
49
+ headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
50
+ )
51
+ response = urllib.request.urlopen(req)
52
+ text = response.read()
53
  html_text = text.decode("utf-8")
54
  page = etree.HTML(html_text)
55
  articlelist = page.xpath(