from knowledgeassistant.exception.exception import KnowledgeAssistantException from knowledgeassistant.logging.logger import logging from knowledgeassistant.entity.config_entity import DataSummarizationConfig from knowledgeassistant.utils.main_utils.utils import write_txt_file, read_txt_file import sys import torch from transformers import pipeline, AutoTokenizer class DataSummarization: def __init__(self, data_summarization_config: DataSummarizationConfig): try: self.data_summarization_config = data_summarization_config except Exception as e: raise KnowledgeAssistantException(e, sys) def summarize(self, input_text_path: str, min_length: int): try: model_path = "/app/models/bart-large-cnn" tokenizer = AutoTokenizer.from_pretrained(model_path) pipe = pipeline("summarization", model=model_path, tokenizer=model_path) logging.info("Summarization Pipeline Successfully Setup") text = read_txt_file(input_text_path) tokens = tokenizer.encode(text, truncation=True, max_length=1024, return_tensors="pt") if len(tokens[0]) >= 1024: logging.warning("Input text exceeded 1024 tokens. It has been truncated.") truncated_text = tokenizer.decode(tokens[0], skip_special_tokens=True) frontend_message = "Your input text exceeded the limit of 1024 tokens and has been truncated." else: truncated_text = text frontend_message = "" # Generate summary summary = pipe(truncated_text, min_length=min_length, max_length=142, do_sample=False) logging.info("Text successfully summarized") # Save summary write_txt_file(self.data_summarization_config.summarized_text_file_path, summary[0].get("summary_text")) logging.info("Successfully wrote summarized text") # Return summary along with frontend message return { "summary": summary[0].get("summary_text"), "warning": frontend_message } except Exception as e: raise KnowledgeAssistantException(e, sys) def initiate_data_summarization(self, input_text_path: str, min_length: int): try: self.summarize( input_text_path = input_text_path, min_length = min_length ) except Exception as e: raise KnowledgeAssistantException(e, sys)