Spaces:

pmkhanh7890
/

news_verification

Sleeping

run pre-commit

38fd181 3 months ago

617 Bytes

	from nltk.tokenize import sent_tokenize


	def split_into_paragraphs(input_text):
	"""
	Splits input text into sentences by newlines.

	Args:
	input_text: The input text as a string.

	Returns:
	A list of sentences. Returns an empty list if input is not valid.
	"""
	if not isinstance(input_text, str):
	return []

	paragraphs = input_text.splitlines(keepends=True)
	sentences = []
	for paragraph in paragraphs:
	paragraph = paragraph.strip()
	if paragraph and paragraph != "\n":
	sentences.extend(sent_tokenize(paragraph))
	return sentences