Muhammad Abdur Rahman Saad
commited on
Commit
·
122b0c4
1
Parent(s):
e262e38
Update csrc.py
Browse files- source/csrc.py +6 -2
source/csrc.py
CHANGED
@@ -44,8 +44,12 @@ def crawl(delta):
|
|
44 |
i = i + 1
|
45 |
logger.info(f"Fetching from URL: {category_url}")
|
46 |
logger.info(category_url)
|
47 |
-
req = urllib.request.
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
html_text = text.decode("utf-8")
|
50 |
page = etree.HTML(html_text)
|
51 |
articlelist = page.xpath(
|
|
|
44 |
i = i + 1
|
45 |
logger.info(f"Fetching from URL: {category_url}")
|
46 |
logger.info(category_url)
|
47 |
+
req = urllib.request.Request(
|
48 |
+
category_url,
|
49 |
+
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
50 |
+
)
|
51 |
+
response = urllib.request.urlopen(req)
|
52 |
+
text = response.read()
|
53 |
html_text = text.decode("utf-8")
|
54 |
page = etree.HTML(html_text)
|
55 |
articlelist = page.xpath(
|