Spaces:
Runtime error
Runtime error
File size: 883 Bytes
21e639d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import re
from typing import List
from .date import format_date
from scrapy.http.response.html import HtmlResponse
meta_tag_selector = '//*[@id="main-content"]/div/span[@class="article-meta-tag"]'
async def get_meta_data(response: HtmlResponse) -> List[str]:
"""The get_meta_data function gets the meta data from a ptt post.
Args:
response (HtmlResponse): the response to parse
Returns:
a list of strings
"""
meta_tag = response.xpath(
f"{meta_tag_selector}/following-sibling::*/text()"
).getall()
author_info = meta_tag[0]
author = re.search(r"([^(]*)", author_info).group(1).strip()
alias_match = re.search(r"\((.*)\)", author_info)
alias = alias_match.group(1) if alias_match else ""
meta_tag[0] = author
meta_tag[-1] = await format_date(meta_tag[-1])
meta_tag.insert(1, alias)
return meta_tag
|