File size: 883 Bytes
21e639d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import re
from typing import List
from .date import format_date
from scrapy.http.response.html import HtmlResponse

meta_tag_selector = '//*[@id="main-content"]/div/span[@class="article-meta-tag"]'


async def get_meta_data(response: HtmlResponse) -> List[str]:
    """The get_meta_data function gets the meta data from a ptt post.

    Args:
        response (HtmlResponse): the response to parse
    Returns:
        a list of strings
    """

    meta_tag = response.xpath(
        f"{meta_tag_selector}/following-sibling::*/text()"
    ).getall()

    author_info = meta_tag[0]
    author = re.search(r"([^(]*)", author_info).group(1).strip()
    alias_match = re.search(r"\((.*)\)", author_info)
    alias = alias_match.group(1) if alias_match else ""
    meta_tag[0] = author
    meta_tag[-1] = await format_date(meta_tag[-1])
    meta_tag.insert(1, alias)
    return meta_tag