"""
Author: Khanh Phan
Date: 2024-12-04
"""

from nltk.tokenize import sent_tokenize


# TODO: consider moving to helpers
def split_into_sentences(input_text: str) -> list[str]:
    """
    Splits input text into sentences by newlines
        and then tokenizes each paragraph into sentences.

    Args:
        input_text (str): The input text as a string.

    Returns:
        list: A list of sentences.
            Returns an empty list if input is not a string.
    """
    if not isinstance(input_text, str):
        return []

    # Split the input text into paragraphs based on newline characters,
    # keeping the newline characters.
    paragraphs = input_text.splitlines(keepends=True)
    sentences = []
    for paragraph in paragraphs:
        # Remove leading/trailing whitespace
        paragraph = paragraph.strip()

        if paragraph and paragraph != "\n":
            # Tokenize the paragraph into sentences
            sentences.extend(sent_tokenize(paragraph))

    return sentences


def split_into_paragraphs(input_text: str) -> list[str]:
    """
    Splits input text into paragraphs based on newline characters.

    Args:
        input_text (str): The input text as a string.

    Returns:
        list: A list of paragraphs.
            Returns an empty list if input is not a string.
    """
    if not isinstance(input_text, str):
        return []

    # Split the input text into paragraphs based on newline characters,
    # keeping the newline characters.
    paragraphs = input_text.splitlines(keepends=True)
    out_paragraphs = []

    for paragraph in paragraphs:
        # Remove leading/trailing whitespace
        paragraph = paragraph.strip()

        if paragraph and paragraph != "\n":
            # Append the cleaned paragraph to the output list.
            out_paragraphs.append(paragraph)

    return out_paragraphs