Muhammad Abdur Rahman Saad
commited on
Commit
·
4c3d51c
1
Parent(s):
0b8b219
Update daily.py
Browse files
daily.py
CHANGED
@@ -10,7 +10,6 @@ import urllib.request
|
|
10 |
import uuid
|
11 |
from datetime import datetime, timedelta
|
12 |
from urllib.parse import urlparse
|
13 |
-
from prefect import flow, task
|
14 |
|
15 |
from lxml import etree
|
16 |
from glue import glue_job_run
|
@@ -18,7 +17,6 @@ from utils import (crawl, datemodifier, encode, encode_content,
|
|
18 |
extract_from_pdf, extract_reference, fetch_url,
|
19 |
sentiment_computation, translate, update_content)
|
20 |
|
21 |
-
@task(name = "crawl_eastmoney")
|
22 |
def crawl_eastmoney(url, article):
|
23 |
"""
|
24 |
Crawls the given URL and extracts information from the webpage.
|
@@ -66,7 +64,6 @@ def crawl_eastmoney(url, article):
|
|
66 |
extract_reference(article)
|
67 |
update_content(article)
|
68 |
|
69 |
-
@task(name = "data collection")
|
70 |
def daily():
|
71 |
with open('xpath.json', 'r', encoding='UTF-8') as f:
|
72 |
xpath_dict = json.load(f)
|
@@ -525,7 +522,7 @@ def daily():
|
|
525 |
except Exception as error:
|
526 |
print(error)
|
527 |
|
528 |
-
|
529 |
def data_collection():
|
530 |
daily()
|
531 |
glue_job_run()
|
|
|
10 |
import uuid
|
11 |
from datetime import datetime, timedelta
|
12 |
from urllib.parse import urlparse
|
|
|
13 |
|
14 |
from lxml import etree
|
15 |
from glue import glue_job_run
|
|
|
17 |
extract_from_pdf, extract_reference, fetch_url,
|
18 |
sentiment_computation, translate, update_content)
|
19 |
|
|
|
20 |
def crawl_eastmoney(url, article):
|
21 |
"""
|
22 |
Crawls the given URL and extracts information from the webpage.
|
|
|
64 |
extract_reference(article)
|
65 |
update_content(article)
|
66 |
|
|
|
67 |
def daily():
|
68 |
with open('xpath.json', 'r', encoding='UTF-8') as f:
|
69 |
xpath_dict = json.load(f)
|
|
|
522 |
except Exception as error:
|
523 |
print(error)
|
524 |
|
525 |
+
|
526 |
def data_collection():
|
527 |
daily()
|
528 |
glue_job_run()
|