import os  
import shutil  
import random  
import pandas as pd  
import numpy as np  
import nltk  
import google.generativeai as genai  
import csv  
from transformers import (  
    AutoTokenizer,  
    DataCollatorWithPadding,  
    AutoModelForSequenceClassification,  
    EarlyStoppingCallback,  
    TrainerCallback,  
    TrainingArguments,  
    Trainer  
)  
from openai import OpenAI  
from sklearn.neural_network import MLPClassifier  
from sklearn.metrics import roc_auc_score, accuracy_score  
from os.path import join  
from langchain.chat_models import ChatOpenAI  
from datasets import load_metric, load_dataset, Dataset  
from copy import deepcopy  
from bart_score import BARTScorer  
import argparse  
  
# Constants  
TOGETHER_API_KEY = "your_together_api_key"  
OPENAI_API_KEY = "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" 
GEMINI_API_KEY = "your_gemini_key"  
LOG_FILE = "data/99_log.txt"  
OUTPUT_FILE = "data/result.txt"  
METRIC_NAME = "roc_auc"  

TRAIN_RATIO = 0.8  
VAL_RATIO = 0.1  
NUMBER_OF_MAX_EPOCH_WITH_EARLY_STOPPING = 10  
PATIENCE = 3  
BATCH_SIZE = 8  
OPTIMIZED_METRIC = "roc_auc"  
SEED = 0  
TEMPERATURE = 0.0  
IS_OUTPUT_NORMALIZATION = False  
RATIO = 0.9  
HUMAN_LABEL = 0
MACHINE_LABEL = 1
BART = "bart"

MULTIMODEL = "multimodel"
SINGLE_FROM_MULTIMODEL = "single_from_multimodel"

# Environment setup  
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY  
os.environ['CURL_CA_BUNDLE'] = ''  
os.environ['REQUESTS_CA_BUNDLE'] = ''  
  
# Download necessary NLTK data  
nltk.download('punkt')
nltk.download('punkt_tab')
  
# Chat model configurations  
chat_model = ChatOpenAI(temperature=TEMPERATURE, model_name="gpt-3.5-turbo-0125")  
  
# API Models and Paths  
CHATGPT = "ChatGPT"  
GEMINI = "Gemini"  
# LLAMA_2_70_CHAT_TEMP_0 = "LLaMa"  
API_ERROR = "API_ERROR"  
IGNORE_BY_API_ERROR = "IGNORE_BY_API_ERROR"  
  
# Initialize BARTScorer  
bart_scorer = BARTScorer(device='cuda:0', checkpoint="facebook/bart-large-cnn")  
  
# Generative AI configuration  
genai.configure(api_key=GEMINI_API_KEY, transport='rest')  
generation_config = {  
    "temperature": TEMPERATURE,  
}  
GEMINI_MODEL = genai.GenerativeModel('gemini-pro', generation_config=generation_config)  
  
# Model paths  
MODEL_PATHS = {  
    "LLaMa": "meta-llama/Llama-2-70b-chat-hf",  
    "QWEN": "Qwen/Qwen1.5-72B-Chat",  
    "Yi": "NousResearch/Nous-Hermes-2-Yi-34B",  
    "Mixtral": "mistralai/Mixtral-8x7B-Instruct-v0.1",  
    "OLMo": "allenai/OLMo-7B-Instruct",  
    "Phi": "microsoft/phi-2",  
    "OpenChat": "openchat/openchat-3.5-1210",  
    "WizardLM": "WizardLM/WizardLM-13B-V1.2",  
    "Vicuna": "lmsys/vicuna-13b-v1.5"  
}  

TOGETHER_PATH ='https://api.together.xyz'
  
# Roberta model configurations  
ROBERTA_BASE = "roberta-base"  
ROBERTA_LARGE = "roberta-large"  
ROBERTA_MODEL_PATHS = {  
    ROBERTA_BASE: "roberta-base",  
    ROBERTA_LARGE: "roberta-large"  
}  
LEARNING_RATES = {  
    ROBERTA_BASE: 2e-5,  
    ROBERTA_LARGE: 8e-6  
}  
MODEL_NAME = ROBERTA_BASE  


# Tokenizer initialization  
tokenizer = AutoTokenizer.from_pretrained(ROBERTA_MODEL_PATHS[MODEL_NAME])  
  
# Custom callback for Trainer  
class CustomCallback(TrainerCallback):  
    """  
    Custom callback to evaluate the training dataset at the end of each epoch.  
    """  
    def __init__(self, trainer) -> None:  
        super().__init__()  
        self._trainer = trainer  
  
    def on_epoch_end(self, args, state, control, **kwargs):  
        """  
        At the end of each epoch, evaluate the training dataset.  
        """  
        if control.should_evaluate:  
            control_copy = deepcopy(control)  
            self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train")  
            return control_copy  
  
# Metric loading  
metric = load_metric(METRIC_NAME)  

def compute_metrics(evaluation_predictions):
    """
    Function to compute evaluation metrics for model predictions.
    
    Parameters:
    evaluation_predictions (tuple): A tuple containing two elements:
        - predictions (array-like): The raw prediction scores from the model.
        - labels (array-like): The true labels for the evaluation data.
    
    Returns:
    dict: A dictionary containing the computed evaluation metrics.
    """
    # Unpack predictions and labels from the input tuple
    raw_predictions, true_labels = evaluation_predictions

    # Convert raw prediction scores to predicted class labels
    predicted_labels = np.argmax(raw_predictions, axis=1)

    # Compute and return the evaluation metrics
    return metric.compute(prediction_scores=predicted_labels, references=true_labels, average="macro")


def abstract_proofread(model_path, temperature, base_url, api_key, prompt):
    """
    Function to proofread an abstract using an AI language model.
    
    Parameters:
    model_path (str): The path or identifier of the AI model to use.
    temperature (float): Sampling temperature for the model's output.
    base_url (str): The base URL for the API endpoint.
    api_key (str): The API key for authentication.
    prompt (str): The text prompt to provide to the AI for proofreading.
    
    Returns:
    str: The proofread abstract generated by the AI model.
    """
    # Initialize the AI client with the provided API key and base URL
    ai_client = OpenAI(api_key=api_key, base_url=base_url)

    # Create a chat completion request with the system message and user prompt
    chat_completion = ai_client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are an AI assistant",
            },
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=model_path,
        max_tokens=1024,
        temperature=temperature,
    )

    # Return the content of the first choice's message
    return chat_completion.choices[0].message.content


def proofread_by_model_name(model_name, input_text, normalize_output):  
    """  
    Proofreads the given input text using the specified model.  
  
    Args:  
        model_name (str): The name of the model to use for proofreading.  
        input_text (str): The text to be proofread.  
        normalize_output (bool): Whether to normalize the output or not.  
  
    Returns:  
        str: The proofread text.  
    """  
    # Constants for API access  
    base_url = TOGETHER_PATH  
    api_key = TOGETHER_API_KEY  
    temperature = TEMPERATURE
      
    # Retrieve the model path from the dictionary  
    if model_name in MODEL_PATHS:  
        model_path = MODEL_PATHS[model_name]  
    else:  
        raise ValueError("Model name not found in the dictionary.")  
      
    # Formulate the prompt for the model  
    prompt = f"Proofreading for the text: ```{input_text}```"  
      
    # Apply output normalization if required  
    if normalize_output:  
        prompt = output_normalization(prompt)  
      
    # Debugging: Print the prompt  
    print(f"Prompt: {prompt}")  
      
    # Call the abstract proofreading function with the prepared parameters  
    return abstract_proofread(model_path, temperature, base_url, api_key, prompt)  


def gemini_proofread(input_text, normalize_output):  
    """  
    Proofreads the given text using the GEMINI_MODEL.  
      
    Parameters:  
    input_text (str): The text to be proofread.  
    normalize_output (bool): Flag indicating whether to normalize the output.  
  
    Returns:  
    str: The proofread text.  
    """  
    prompt = f"Proofreading for the text: ```{input_text}```"  
    if normalize_output:  
        prompt = output_normalization(prompt)  
    response = GEMINI_MODEL.generate_content(prompt)  
    return response.text  
  
def print_and_log(message):  
    """  
    Prints and logs the given message to a log file.  
      
    Parameters:  
    message (str): The message to be printed and logged.  
    """  
    print(message)  
    with open(LOG_FILE, "a+", encoding='utf-8') as log_file:  
        log_file.write(message + "\n")  
  
def write_to_file(filename, content):  
    """  
    Writes the given content to a specified file.  
      
    Parameters:  
    filename (str): The name of the file to write to.  
    content (str): The content to be written.  
    """  
    print(content)  
    with open(filename, "a+", encoding='utf-8') as file:  
        file.write(content)  
  
def output_normalization(prompt):  
    """  
    Normalizes the output by appending a specific instruction to the prompt.  
      
    Parameters:  
    prompt (str): The initial prompt.  
  
    Returns:  
    str: The modified prompt.  
    """  
    return prompt + " Please only output the proofread text without any explanation."  
  
def chatGPT_proofread(input_text, normalize_output):  
    """  
    Proofreads the given text using the chat_model.  
      
    Parameters:  
    input_text (str): The text to be proofread.  
    normalize_output (bool): Flag indicating whether to normalize the output.  
  
    Returns:  
    str: The proofread text.  
    """  
    prompt = f"Proofreading for the text: ```{input_text}```"  
    if normalize_output:  
        prompt = output_normalization(prompt)  
      
    print(f"Starting API call with prompt: {prompt}")  
    result = chat_model.predict(prompt)  
    print(f"Ending API call with prompt: {prompt}")  
      
    return result  
  
def normalize_text(input_text):  
    """  
    Normalizes the given text by removing certain characters and extra spaces.  
      
    Parameters:  
    input_text (str): The text to be normalized.  
  
    Returns:  
    str: The normalized text.  
    """  
    result = input_text.strip()  
    result = result.replace("**", "")  
    result = result.replace("\n", " ")  
    result = result.replace("  ", " ")  # Remove extra spaces  
    return result  
  
def write_to_csv(filename, row_data):  
    """  
    Writes a row of data to a specified CSV file.  
      
    Parameters:  
    filename (str): The name of the CSV file.  
    row_data (list): The row data to be written.  
    """  
    with open(filename, 'a+', encoding='UTF8', newline='') as file:  
        writer = csv.writer(file)  
        writer.writerow(row_data)  
  
def number_of_csv_lines(filename):  
    """  
    Returns the number of lines in a specified CSV file.  
      
    Parameters:  
    filename (str): The name of the CSV file.  
  
    Returns:  
    int: The number of lines in the CSV file.  
    """  
    file_data = pd.read_csv(filename, sep=',').values  
    return len(file_data)  
  
def read_csv_data(input_file):  
    """  
    Reads data from a specified CSV file.  
      
    Parameters:  
    input_file (str): The name of the CSV file.  
  
    Returns:  
    numpy.ndarray: The data read from the CSV file.  
    """  
    file_data = pd.read_csv(input_file, dtype='string', keep_default_na=False, sep=',').values  
    return file_data  
  
def bart_score(text_1, text_2):  
    """  
    Computes the BART score between two texts.  
      
    Parameters:  
    text_1 (str): The first text.  
    text_2 (str): The second text.  
  
    Returns:  
    float: The BART score.  
    """  
    score = bart_scorer.score([text_1], [text_2])  
    return score  
  
def check_bart_score(input_text, raw_text):  
    """  
    Checks if the BART score between input_text and raw_text is above a threshold.  
      
    Parameters:  
    input_text (str): The input text.  
    raw_text (str): The raw text to compare against.  
  
    Returns:  
    bool: True if the score is above the threshold, False otherwise.  
    """  
    THRESHOLD = -2.459  
    normalized_text = normalize_text(raw_text)  
    score = bart_score(input_text, normalized_text)[0]  
    return score >= THRESHOLD  
  
def get_column(input_file, column_name):  
    """  
    Retrieves a specific column from a CSV file.  
      
    Parameters:  
    input_file (str): The name of the CSV file.  
    column_name (str): The name of the column to retrieve.  
  
    Returns:  
    numpy.ndarray: The values from the specified column.  
    """  
    df = pd.read_csv(input_file, dtype='string', keep_default_na=False, sep=',')  
    column_data = df[column_name]  
    return column_data.values  
  
def generate_column_names(categories):  
    """  
    Generates a list of column names based on given categories.  
      
    Parameters:  
    categories (list): The list of categories.  
  
    Returns:  
    list: The generated list of column names.  
    """  
    column_names = ['human']  
    for name in categories:  
        column_names.append(name)  
    for first in categories:  
        for second in categories:  
            column_names.append(f"{first}_{second}")  
    return column_names  
  
def write_new_data(output_file, current_data, column_names):  
    """  
    Writes new data to a CSV file based on current data and column names.  
      
    Parameters:  
    output_file (str): The name of the output CSV file.  
    current_data (dict): The current data to be written.  
    column_names (list): The list of column names.  
    """  
    data_row = [current_data[column] for column in column_names]  
    write_to_csv(output_file, data_row)  

def refine(input_text, candidate):  
    """  
    Refines the candidate string by removing specific surrounding marks if they are present  
    in the input_text with a count difference of exactly 2.  
      
    Args:  
        input_text (str): The original text.  
        candidate (str): The candidate text to be refined.  
  
    Returns:  
        str: The refined candidate text.  
    """  
  
    # Create a copy of the candidate string and strip whitespace  
    refined_candidate = candidate.strip()  
  
    # List of marks to check and potentially remove  
    marks = ["```", "'", '"']  
  
    # Iterate through each mark  
    for mark in marks:  
        # Count occurrences of the mark in input_text and refined_candidate  
        count_input_text = input_text.count(mark)  
        count_refined_candidate = refined_candidate.count(mark)  
  
        # Check if the mark should be stripped  
        if (count_refined_candidate == count_input_text + 2 and  
                refined_candidate.startswith(mark) and  
                refined_candidate.endswith(mark)):  
            # Strip the mark from both ends of the refined_candidate  
            refined_candidate = refined_candidate.strip(mark)  
  
    return refined_candidate  


def extract_by_best_similarity(input_text, raw_text):  
    """  
    Extracts the best candidate string from the raw text based on the highest similarity score  
    compared to the input text. The similarity score is calculated using the BART score.  
  
    Args:  
        input_text (str): The original text.  
        raw_text (str): The raw text containing multiple candidate strings.  
  
    Returns:  
        str: The best candidate string with the highest similarity score.   
             Returns the input text if no suitable candidate is found.  
    """  
  
    # Refine the raw text  
    refined_raw_text = refine(input_text, raw_text)  
  
    # Tokenize the refined raw text into sentences  
    raw_candidates = nltk.sent_tokenize(refined_raw_text)  
  
    # Split sentences further by newlines to get individual candidates  
    candidate_list = []  
    for sentence in raw_candidates:  
        candidate_list.extend(sentence.split("\n"))  
  
    # Initialize variables to track the best similarity score and the best candidate  
    best_similarity = -9999  
    best_candidate = ""  
  
    # Iterate over each candidate to find the best one based on the BART score  
    for candidate in candidate_list:  
        refined_candidate = refine(input_text, candidate)  
        if check_bart_score(input_text, refined_candidate):  
            score = bart_score(input_text, refined_candidate)[0]  
            if score > best_similarity:  
                best_similarity = score  
                best_candidate = refined_candidate  
  
    # Print the best candidate found  
    print(f"best_candidate = {best_candidate}")  
  
    # Return the best candidate if found, otherwise return the input text  
    if best_candidate == "":  
        return input_text  
    return best_candidate  

def proofread_with_best_similarity(input_text, model_kind):  
    """  
    Proofreads the input text using the specified model and extracts the best-corrected text based on similarity.  
  
    Args:  
        input_text (str): The original text to be proofread.  
        model_kind (str): The kind of model to use for proofreading (e.g., CHATGPT, GEMINI).  
  
    Returns:  
        tuple: A tuple containing the raw proofread text and the best-corrected text.  
    """  
      
    # Normalize the input text  
    normalized_input_text = normalize_text(input_text)  
    print_and_log(f"INPUT = {normalized_input_text}")  
      
    result_text = ""  
    raw_text = ""  
      
    for i in range(1):  # Loop is redundant as it runs only once; consider removing if unnecessary  
        # Select the proofreading model based on model_kind  
        if model_kind == CHATGPT:  
            raw_text = chatGPT_proofread(normalized_input_text, normalize_output=IS_OUTPUT_NORMALIZATION)  
        elif model_kind == GEMINI:  
            raw_text = gemini_proofread(normalized_input_text, normalize_output=IS_OUTPUT_NORMALIZATION)  
        else:  
            raw_text = proofread_by_model_name(model_kind, normalized_input_text, normalize_output=IS_OUTPUT_NORMALIZATION)  
          
        # Extract the best candidate text based on similarity  
        result_text = extract_by_best_similarity(normalized_input_text, raw_text)  
          
        # Log the raw and result texts  
        print_and_log(f"RAW_{i} = {raw_text}")  
        print_and_log(f"RESULT_{i} = {result_text}")  
          
        # Normalize the result text  
        result_text = normalize_text(result_text)  
          
        # If a valid result is obtained, return it  
        if result_text != "":  
            return raw_text, result_text  
      
    # Return the raw and result texts  
    return raw_text, result_text  

def generate_file_name(existing_data_file, existing_kinds, new_kinds):  
    """  
    Generates a new file name based on the path of an existing data file and a combination of existing and new kinds.  
  
    Args:  
        existing_data_file (str): The path to the existing data file.  
        existing_kinds (list): A list of existing kinds.  
        new_kinds (list): A list of new kinds.  
  
    Returns:  
        str: The generated file name with the full path.  
    """  
      
    # Combine existing and new kinds into a single list  
    combined_kinds = existing_kinds + new_kinds  
      
    # Get the directory path of the existing data file  
    directory_path = os.path.dirname(existing_data_file)  
      
    # Create a new file name by joining the kinds with underscores and adding a suffix  
    new_file_name = "_".join(combined_kinds) + "_with_best_similarity.csv"  
      
    # Combine the directory path with the new file name to get the full output file path  
    output_file_path = os.path.join(directory_path, new_file_name)  
      
    return output_file_path  


def generate_new_data_with_best_similarity(existing_data_file, existing_kinds, new_kinds):  
    """  
    Generates new data with the best similarity based on existing and new kinds, and writes the results to a CSV file.  
  
    Args:  
        existing_data_file (str): The path to the existing data file.  
        existing_kinds (list): A list of existing kinds.  
        new_kinds (list): A list of new kinds.  
  
    Returns:  
        None  
    """  
      
    # Combine existing and new kinds into a single list  
    all_kinds = existing_kinds + new_kinds  
      
    # Generate column names for the CSV file  
    column_names = generate_column_names(all_kinds)  
      
    # Generate column names for existing kinds  
    existing_column_names = generate_column_names(existing_kinds)  
      
    # Generate the output file name  
    output_file = generate_file_name(existing_data_file, existing_kinds, new_kinds)  
      
    # Create the output file with column names if it doesn't exist  
    if not os.path.exists(output_file):  
        write_to_csv(output_file, column_names)  
      
    # Read existing data from the file  
    existing_data = {kind: get_column(existing_data_file, kind) for kind in existing_column_names}  
      
    # Read input data from the output file  
    input_data = read_csv_data(output_file)  
    start_index = len(input_data)  
    print(f"start_index = {start_index}")  
      
    num_rows = len(existing_data["human"])  
    global_generate_set = []  
    global_reuse = []  
      
    for index in range(start_index, num_rows):  
        # Initialize generation and reuse sets  
        generate_set = []  
        reuse_set = []  
          
        # Prepare the current generation dictionary  
        current_generation = {kind: existing_data[kind][index] for kind in existing_column_names}  
        print(f"current_generation before generation = {current_generation}")  
          
        human_text = current_generation["human"]  
          
        # Generate new kinds based on human text  
        for kind in new_kinds:  
            _, generated_text = proofread_with_best_similarity(human_text, kind)  
            current_generation[kind] = generated_text  
            generate_set.append(kind)  
          
        print(f"current_generation after generate one = {current_generation}")  
          
        # Generate combinations of kinds  
        for first_kind in all_kinds:  
            for second_kind in all_kinds:  
                combination_name = f"{first_kind}_{second_kind}"  
                  
                if combination_name not in current_generation:  
                    if first_kind in current_generation and current_generation[first_kind] == human_text:  
                        generated_text = current_generation[second_kind]  
                        reuse_set.append(f"{combination_name} from {second_kind}")  
                    else:  
                        is_need_generation = True  
                        for first_kind_2 in all_kinds:  
                            if first_kind != first_kind_2 and current_generation[first_kind] == current_generation[first_kind_2]:  
                                combination_name_2 = f"{first_kind_2}_{second_kind}"  
                                if combination_name_2 in current_generation:  
                                    generated_text = current_generation[combination_name_2]  
                                    reuse_set.append(f"{combination_name} from {combination_name_2}")  
                                    is_need_generation = False  
                                    break  
                        if is_need_generation:  
                            _, generated_text = proofread_with_best_similarity(current_generation[first_kind], second_kind)  
                            generate_set.append(f"{first_kind}_{second_kind}")  
                      
                    current_generation[combination_name] = generated_text  
          
        # Write the current generation to the output file  
        write_new_data(output_file, current_generation, column_names)  
          
        # Update global sets  
        global_generate_set.append(generate_set)  
        global_reuse

def shuffle(array, seed):  
    """  
    Shuffles the elements of each sublist in the given array using the specified seed.  
  
    Args:  
        array (list of lists): The array containing sublists to shuffle.  
        seed (int): The seed value for the random number generator.  
  
    Returns:  
        None  
    """  
    for sublist in array:  
        random.Random(seed).shuffle(sublist)  
  
def generate_human_with_shuffle(dataset_name, column_name, num_samples, output_file):  
    """  
    Generates a shuffled list of sentences from the dataset and writes them to a CSV file.  
  
    Args:  
        dataset_name (str): The name of the dataset to load.  
        column_name (str): The column name to extract sentences from.  
        num_samples (int): The number of samples to process.  
        output_file (str): The path to the output CSV file.  
  
    Returns:  
        None  
    """  
    # Load the dataset  
    dataset = load_dataset(dataset_name)  
    data = dataset['train']  
      
    lines = []  
    # Tokenize sentences and add to the lines list  
    for sample in data:  
        nltk_tokens = nltk.sent_tokenize(sample[column_name])  
        lines.extend(nltk_tokens)  
      
    # Filter out empty lines  
    filtered_lines = [line for line in lines if line != ""]  
    lines = filtered_lines  
      
    # Shuffle the lines  
    shuffle([lines], seed=SEED)  
      
    # Ensure the output file exists and write the header if it doesn't  
    if not os.path.exists(output_file):  
        header = ["human"]  
        write_to_csv(output_file, header)  
      
    # Get the number of lines already processed in the output file  
    number_of_processed_lines = number_of_csv_lines(output_file)  
      
    # Print the initial lines to be processed  
    print(f"Lines before processing: {lines[:num_samples]}")  
      
    # Slice the lines list to get the unprocessed lines  
    lines = lines[number_of_processed_lines:num_samples]  
      
    # Print the lines after slicing  
    print(f"Lines after slicing: {lines}")  
      
    # Process each line and write to the output file  
    for index, human in enumerate(lines):  
        normalized_text = normalize_text(human)  
        output_data = [normalized_text]  
        write_to_csv(output_file, output_data)  
        print(f"Processed {index + 1} / {len(lines)}; Total processed: {number_of_processed_lines + index + 1} / {num_samples}")  


def split(data, ratio):  
    """  
    Splits the data into training and testing sets based on the given ratio.  
  
    Args:  
        data (list): The dataset to split.  
        ratio (float): The ratio for splitting the data into training and testing sets.  
  
    Returns:  
        tuple: A tuple containing the training data and the testing data.  
    """  
    train_size = int(len(data) * ratio)  
    train_data = data[:train_size]  
    test_data = data[train_size:]  
    return train_data, test_data  
  
def bart_score_in_batch(text_1, text_2):  
    """  
    Calculates the BART score for pairs of texts in batches.  
  
    Args:  
        text_1 (list of str): The first list of texts.  
        text_2 (list of str): The second list of texts.  
  
    Returns:  
        list: A list of BART scores for each pair of texts.  
    """  
    return bart_scorer.score(text_1, text_2, batch_size=BATCH_SIZE)  
  
def extract_feature_in_batch(text_1, text_2, feature_kind):  
    """  
    Extracts features for pairs of texts using BART scores.  
  
    Args:  
        text_1 (list of str): The first list of texts.  
        text_2 (list of str): The second list of texts.  
        feature_kind (str): The type of feature to extract.  
  
    Returns:  
        list: A list of extracted features.  
    """  
    features = bart_score_in_batch(text_1, text_2)  
    return features  
  
def abstract_train(features, labels):  
    """  
    Trains a model using the given features and labels.  
  
    Args:  
        features (list): The input features for training.  
        labels (list): The target labels for training.  
  
    Returns:  
        object: The trained model.  
    """  
    model = MLPClassifier()  
    model.fit(features, labels)  
    return model  
  
def evaluate_model(model, features, labels):  
    """  
    Evaluates the model's performance using accuracy and ROC AUC scores.  
  
    Args:  
        model (object): The trained model to evaluate.  
        features (list): The input features for evaluation.  
        labels (list): The target labels for evaluation.  
  
    Returns:  
        None  
    """  
    predictions = model.predict(features)  
    rounded_predictions = [round(value) for value in predictions]  
      
    accuracy = accuracy_score(labels, rounded_predictions)  
    write_to_file(OUTPUT_FILE, f"Accuracy: {accuracy * 100.0:.1f}%\n")  
      
    roc_auc = roc_auc_score(labels, rounded_predictions)  
    write_to_file(OUTPUT_FILE, f"ROC AUC: {roc_auc * 100.0:.1f}%\n")  
  
def combine_text_with_BERT_format(text_list):  
    """  
    Combines a list of texts into a single string formatted for BERT input.  
  
    Args:  
        text_list (list of str): The list of texts to combine.  
  
    Returns:  
        str: The combined text string formatted for BERT input.  
    """  
    combined_text = f"<s>{text_list[0]}</s>"  
    for i in range(1, len(text_list)):  
        combined_text += f"</s>{text_list[i]}</s>"  
    return combined_text  


def preprocess_function_multimodel(sample):  
    """  
    Preprocesses a given sample for a multi-model setup by calculating BART scores  
    and formatting the text for BERT input.  
  
    Args:  
        sample (dict): A dictionary containing a key "text", which is a list of lists of strings.  
  
    Returns:  
        dict: A dictionary containing tokenized and preprocessed text data.  
    """  
    num_texts = len(sample["text"][0])  # Number of texts in each sub-sample  
    texts_grouped_by_index = [[] for _ in range(num_texts)]  # Initialize empty lists for grouping texts by index  
  
    # Group texts by their index across sub-samples  
    for sub_sample in sample["text"]:  
        for i in range(num_texts):  
            texts_grouped_by_index[i].append(sub_sample[i])  
  
    # Calculate BART scores for each text pair (text[0] with text[i])  
    bart_scores = [bart_score_in_batch(texts_grouped_by_index[0], texts_grouped_by_index[i]) for i in range(1, num_texts)]  
  
    combined_texts = []  
  
    # Process each sub-sample for BERT input  
    for index, sub_sample in enumerate(sample["text"]):  
        text_array = [sub_sample[0]]  # Start with the input text  
        score_generation_pairs = []  
  
        # Pair scores with their corresponding generations  
        for i in range(1, num_texts):  
            generation_text = sub_sample[i]  
            generation_score = bart_scores[i-1][index]  
            score_generation_pairs.append((generation_score, generation_text))  
  
        # Sort pairs by score in descending order  
        sorted_pairs = sorted(score_generation_pairs, reverse=True)  
  
        # Append sorted texts to text_array  
        for _, sorted_text in sorted_pairs:  
            text_array.append(sorted_text)  
  
        # Combine texts into a single BERT-formatted string  
        combined_text = combine_text_with_BERT_format(text_array)  
        combined_texts.append(combined_text)  
  
    # Tokenize the combined texts for BERT  
    return tokenizer(combined_texts, add_special_tokens=False, truncation=True)  

def preprocess_function_single_from_multimodel(sample):  
    """  
    Extracts the first text from each sub-sample in a multi-model sample and tokenizes it.  
  
    Args:  
        sample (dict): A dictionary containing a key "text", which is a list of lists of strings.  
  
    Returns:  
        dict: A dictionary containing tokenized text data.  
    """  
    combined_texts = []  
  
    # Iterate through each sub-sample  
    for sub_sample in sample["text"]:  
        input_text = sub_sample[0]  # Extract the first text from the sub-sample  
        combined_texts.append(input_text)  # Append it to the list of combined texts  
  
    # Tokenize the combined texts  
    return tokenizer(combined_texts, truncation=True)  
  
  
def check_api_error(data):  
    """  
    Checks if any item in the provided data indicates an API error.  
  
    Args:  
        data (list): A list of items to be checked for API errors.  
  
    Returns:  
        bool: True if an API error or ignore by API error is found, otherwise False.  
    """  
    for item in data:  
        if item == API_ERROR or item == IGNORE_BY_API_ERROR:  # Check for API error indicators  
            return True  # Return True if an error indicator is found  
    return False  # Return False if no error indicators are found  


def train_only_by_transformer_with_test_evaluation_early_stop(train_data, test_data, input_type, num_classes=2):  
    """  
    Trains a transformer model using the provided training and testing datasets with early stopping.  
  
    Args:  
        train_data (Dataset): The training dataset.  
        test_data (Dataset): The testing dataset.  
        input_type (str): The type of input data, either MULTIMODEL or SINGLE_FROM_MULTIMODEL.  
        num_classes (int, optional): The number of classes for classification. Defaults to 2.  
  
    Returns:  
        Trainer: The trained model wrapped in a Trainer object.  
    """  
    # Preprocess datasets based on the input type  
    if input_type == MULTIMODEL:  
        train_data = train_data.map(preprocess_function_multimodel, batched=True)  
        test_data = test_data.map(preprocess_function_multimodel, batched=True)  
    elif input_type == SINGLE_FROM_MULTIMODEL:  
        train_data = train_data.map(preprocess_function_single_from_multimodel, batched=True)  
        test_data = test_data.map(preprocess_function_single_from_multimodel, batched=True)  
      
    # Data collator to pad inputs  
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  
  
    # Load appropriate model based on number of classes  
    if num_classes == 3:  
        model = AutoModelForSequenceClassification.from_pretrained(  
            "pretrained_model/roberta-base_num_labels_3", num_labels=num_classes)  
    else:  
        model = AutoModelForSequenceClassification.from_pretrained(  
            ROBERTA_MODEL_PATHS[MODEL_NAME], num_labels=num_classes)  
      
    learning_rate = LEARNING_RATES[MODEL_NAME]  
    output_folder = "training_with_callbacks"  
  
    # Remove the output folder if it already exists  
    if os.path.exists(output_folder):  
        shutil.rmtree(output_folder)  
      
    # Training arguments  
    training_args = TrainingArguments(  
        output_dir=output_folder,  
        evaluation_strategy="epoch",  
        logging_strategy="epoch",  
        save_strategy="epoch",  
        learning_rate=learning_rate,  
        per_device_train_batch_size=BATCH_SIZE,  
        per_device_eval_batch_size=BATCH_SIZE,  
        num_train_epochs=NUMBER_OF_MAX_EPOCH_WITH_EARLY_STOPPING,  
        weight_decay=0.01,  
        push_to_hub=False,  
        metric_for_best_model=OPTIMIZED_METRIC,  
        load_best_model_at_end=True  
    )  
  
    # Create Trainer object  
    trainer = Trainer(  
        model=model,  
        args=training_args,  
        train_dataset=train_data,  
        eval_dataset=test_data,  
        tokenizer=tokenizer,  
        data_collator=data_collator,  
        compute_metrics=compute_metrics,  
        callbacks=[EarlyStoppingCallback(early_stopping_patience=PATIENCE)]  
    )  
  
    # Add custom callback  
    trainer.add_callback(CustomCallback(trainer))  
  
    # Start training  
    trainer.train()  
  
    return trainer  


def calculate_number_of_models(num_columns):  
    """  
    Calculates the number of models required based on the number of columns.  
  
    Args:  
        num_columns (int): The total number of columns.  
  
    Returns:  
        int: The number of models required.  
  
    Raises:  
        Exception: If the number of models cannot be calculated to match the number of columns.  
    """  
    num_models = 0  
    count_human = 1  # Initial count representing human input  
  
    while True:  
        count_single = num_models  # Single model count  
        count_pair = num_models * num_models  # Pair model count  
  
        total_count = count_human + count_single + count_pair  
  
        if total_count == num_columns:  
            return num_models  
        elif total_count > num_columns:  
            raise Exception("Cannot calculate the number of models to match the number of columns")  
  
        num_models += 1  


def read_multimodel_data_from_csv(multimodel_csv_file):  
    """  
    Reads multimodel data from a CSV file and organizes it into a structured format.  
  
    Args:  
        multimodel_csv_file (str): Path to the CSV file containing multimodel data.  
  
    Returns:  
        list: A list of dictionaries, each containing 'human', 'single', and 'pair' data.  
  
    Raises:  
        Exception: If there is an error in reading the CSV file or processing the data.  
    """  
    # Read CSV data into a list of lists  
    input_data = read_csv_data(multimodel_csv_file)  
      
    # Initialize the result list  
    structured_data = []  
      
    # Calculate the number of models based on the number of columns in the first row  
    num_models = calculate_number_of_models(len(input_data[0]))  
      
    # Process each row in the input data  
    for row in input_data:  
        row_data = {}  
        index = 0  
          
        # Extract human data  
        row_data["human"] = row[index]  
        index += 1  
          
        # Extract single model data  
        single_model_data = []  
        for _ in range(num_models):  
            single_model_data.append(row[index])  
            index += 1  
        row_data["single"] = single_model_data  
          
        # Extract pair model data  
        pair_model_data = []  
        for _ in range(num_models):  
            sub_pair_data = []  
            for _ in range(num_models):  
                sub_pair_data.append(row[index])  
                index += 1  
            pair_model_data.append(sub_pair_data)  
        row_data["pair"] = pair_model_data  
          
        # Append the structured row data to the result list  
        structured_data.append(row_data)  
      
    return structured_data  


def check_error(data_item):  
    """  
    Checks for errors in a data item by verifying the 'human', 'single', and 'pair' fields.  
      
    Args:  
        data_item (dict): A dictionary containing 'human', 'single', and 'pair' data.  
          
    Returns:  
        bool: True if any of the fields contain an error, otherwise False.  
    """  
    # Check for API error in the 'human' field  
    if check_api_error(data_item["human"]):  
        return True  
      
    # Check for API error in the 'single' model data  
    for single_text in data_item["single"]:  
        if check_api_error(single_text):  
            return True  
      
    # Get the number of models from the 'single' model data  
    num_models = len(data_item["single"])  
      
    # Check for API error in the 'pair' model data  
    for i in range(num_models):  
        for j in range(num_models):  
            if check_api_error(data_item["pair"][i][j]):  
                return True  
      
    # No errors found  
    return False  


def create_pair_sample(data_item, training_indices):  
    """  
    Creates pair samples for training by comparing human data with machine-generated data.  
      
    Args:  
        data_item (dict): A dictionary containing 'human', 'single', and 'pair' data.  
        training_indices (list): A list of indices used for training.  
          
    Returns:  
        list: A list of dictionaries, each containing a 'text' array and a 'label'.  
    """  
    # Initialize the result list  
    result_samples = []  
      
    # Check if there is any error in the data_item  
    if check_error(data_item):  
        return result_samples  

    print(training_indices)
    print(data_item)
    # Create machine samples  
    for train_idx in training_indices:  
        if data_item["human"] != data_item["single"][train_idx]:  
            text_array = []  
            machine_text = data_item["single"][train_idx]  
            text_array.append(machine_text)  
              
            for sub_idx in training_indices:  
                text_array.append(data_item["pair"][train_idx][sub_idx])  
              
            sample = {  
                "text": text_array,  
                "label": MACHINE_LABEL  
            }  
            result_samples.append(sample)  
      
    # Create human samples  
    text_array = [data_item["human"]]  
      
    for train_idx in training_indices:  
        text_array.append(data_item["single"][train_idx])  
      
    human_sample = {  
        "text": text_array,  
        "label": HUMAN_LABEL  
    }  
      
    # Append human samples for each machine sample  
    num_machine_samples = len(result_samples)  
    for _ in range(num_machine_samples):  
        result_samples.append(human_sample)  
      
    return result_samples  


def create_pair_test_sample(data_item, training_indices, testing_indices):  
    """  
    Creates pair test samples by comparing human data with machine-generated data.  
      
    Args:  
        data_item (dict): A dictionary containing 'human', 'single', and 'pair' data.  
        training_indices (list): A list of indices used for training.  
        testing_indices (list): A list of indices used for testing.  
          
    Returns:  
        list: A list of dictionaries, each containing a 'text' array and a 'label'.  
    """  
    # Initialize the result list  
    result_samples = []  
      
    # Check if there is any error in the data_item  
    if check_error(data_item):  
        return result_samples  
      
    # Create machine samples based on testing indices  
    for test_idx in testing_indices:  
        if data_item["human"] != data_item["single"][test_idx]:  
            text_array = []  
            machine_text = data_item["single"][test_idx]  
            text_array.append(machine_text)  
              
            for train_idx in training_indices:  
                text_array.append(data_item["pair"][test_idx][train_idx])  
              
            sample = {  
                "text": text_array,  
                "label": MACHINE_LABEL  
            }  
            result_samples.append(sample)  
      
    # Create human sample  
    text_array = [data_item["human"]]  
      
    for train_idx in training_indices:  
        text_array.append(data_item["single"][train_idx])  
      
    human_sample = {  
        "text": text_array,  
        "label": HUMAN_LABEL  
    }  
      
    # Append the human sample for each machine sample  
    num_machine_samples = len(result_samples)  
    for _ in range(num_machine_samples):  
        result_samples.append(human_sample)  
      
    return result_samples  


def create_train_val_sample(data, training_indices):  
    """  
    Creates training and validation samples from the provided data.  
      
    Args:  
        data (list): A list of data items, each to be processed.  
        training_indices (list): A list of indices used for training.  
          
    Returns:  
        list: A list of training and validation samples created from the data.  
    """  
    # Initialize the result list  
    result_samples = []  
      
    # Process each item in the data  
    for data_item in data:  
        # Create pair samples for the current item  
        sub_samples = create_pair_sample(data_item, training_indices)  
          
        # Extend the result list with the created sub-samples  
        result_samples.extend(sub_samples)  
      
    return result_samples  
  
  
def create_test_sample(data, training_indices, testing_indices):  
    """  
    Creates test samples from the provided data by comparing human data with machine-generated data.  
      
    Args:  
        data (list): A list of data items, each to be processed.  
        training_indices (list): A list of indices used for training.  
        testing_indices (list): A list of indices used for testing.  
          
    Returns:  
        list: A list of test samples created from the data.  
    """  
    # Initialize the result list  
    result_samples = []  
      
    # Process each item in the data  
    for data_item in data:  
        # Create pair test samples for the current item  
        sub_samples = create_pair_test_sample(data_item, training_indices, testing_indices)  
          
        # Extend the result list with the created sub-samples  
        result_samples.extend(sub_samples)  
      
    return result_samples  


def distribute_data(data, train_indices, test_indices, train_ratio, val_ratio):  
    """  
    Distributes the data into training, validation, and test samples.  
      
    Args:  
        data (list): A list of data items to be split and processed.  
        train_indices (list): A list of indices used for training.  
        test_indices (list): A list of indices used for testing.  
        train_ratio (float): The ratio of data to be used for training.  
        val_ratio (float): The ratio of data to be used for validation.  
          
    Returns:  
        tuple: A tuple containing lists of training, validation, and test samples.  
    """  
    # Split the data into training, validation, and test sets  
    train_data, val_data, test_data = split_train_val_test(data, train_ratio, val_ratio)  
  
    # Create training samples  
    train_samples = create_train_val_sample(train_data, train_indices)  
    write_to_file(OUTPUT_FILE, f"train samples = {len(train_samples)}\n")  
  
    # Create validation samples  
    val_samples = create_train_val_sample(val_data, train_indices)  
    write_to_file(OUTPUT_FILE, f"val samples = {len(val_samples)}\n")  
  
    # Create test samples  
    test_samples = create_test_sample(test_data, train_indices, test_indices)  
    write_to_file(OUTPUT_FILE, f"test samples = {len(test_samples)}\n")  
  
    return train_samples, val_samples, test_samples  
  
  
def convert_to_huggingface_with_multimodel(samples):  
    """  
    Converts a list of samples to the Hugging Face Dataset format.  
      
    Args:  
        samples (list): A list of samples to be converted.  
          
    Returns:  
        Dataset: A Hugging Face Dataset object created from the samples.  
    """  
    return Dataset.from_list(samples)  


def train_by_transformer_with_multimodel_and_early_stop(train_samples, val_samples, input_type):  
    """  
    Trains a transformer model with multimodal data and early stopping.  
      
    Args:  
        train_samples (list): A list of training samples.  
        val_samples (list): A list of validation samples.  
        input_type (str): The type of input data (e.g., multimodal).  
          
    Returns:  
        object: The trained model with early stopping.  
    """  
    # Convert training and validation samples to Hugging Face Dataset format  
    train_data = convert_to_huggingface_with_multimodel(train_samples)  
    val_data = convert_to_huggingface_with_multimodel(val_samples)  
      
    # Train the model with early stopping and return the trained model  
    return train_only_by_transformer_with_test_evaluation_early_stop(train_data, val_data, input_type)  
  
  
def test_by_transformer_with_multimodel(detector, test_samples, input_type):  
    """  
    Tests a trained transformer model with multimodal data.  
      
    Args:  
        detector (object): The trained model to be evaluated.  
        test_samples (list): A list of test samples.  
        input_type (str): The type of input data (e.g., multimodal).  
          
    Returns:  
        None  
    """  
    # Convert test samples to Hugging Face Dataset format  
    test_data = convert_to_huggingface_with_multimodel(test_samples)  
      
    # Apply the appropriate preprocessing function based on the input type  
    if input_type == MULTIMODEL:  
        test_data = test_data.map(preprocess_function_multimodel, batched=True)  
    elif input_type == SINGLE_FROM_MULTIMODEL:  
        test_data = test_data.map(preprocess_function_single_from_multimodel, batched=True)  

    print("Test data:", test_data)
    # Evaluate the model on the test data  
    result = detector.evaluate(eval_dataset=test_data)
    print("Test result:", result)
      
    # Extract and log the ROC AUC score  
    roc_auc = result['eval_roc_auc']  
    write_to_file(OUTPUT_FILE, "roc_auc: %.1f%%" % (roc_auc * 100.0) + "\n")  


def extract_by_feature_kind(samples, feature_type):  
    """  
    Extracts features from the given samples based on the specified feature type.  
  
    Args:  
        samples (list): A list of samples where each sample is a dictionary with 'text' and 'label' keys.  
        feature_type (str): The type of feature to extract.  
  
    Returns:  
        tuple: A tuple containing the extracted features and corresponding labels.  
    """  
    text_1_list = []  
    text_2_list = []  
    labels = []  
      
    for sample in samples:  
        text_1_list.append(sample["text"][0])  
        text_2_list.append(sample["text"][1])  
        labels.append(sample["label"])  
      
    # Extract features in batch based on the feature type  
    features = extract_feature_in_batch(text_1_list, text_2_list, feature_type)  
      
    return features, labels  
  
  
def train_by_feature_kind(train_samples, feature_type):  
    """  
    Trains a model using features extracted from the training samples based on the specified feature type.  
  
    Args:  
        train_samples (list): A list of training samples where each sample is a dictionary with 'text' and 'label' keys.  
        feature_type (str): The type of feature to extract for training.  
  
    Returns:  
        object: The trained model.  
    """  
    # Extract features and labels from the training samples  
    features, labels = extract_by_feature_kind(train_samples, feature_type)  
      
    # Convert features to a numpy array and reshape for training  
    features = np.array(features)  
    features = features.reshape(-1, 1)  
      
    # Train the model using the extracted features and labels  
    model = abstract_train(features, labels)  
      
    return model  


def test_by_feature_kind(detector, samples, feature_type):  
    """  
    Tests a detector using features extracted from the provided samples based on the specified feature type.  
  
    Args:  
        detector (object): The detector model to be evaluated.  
        samples (list): A list of samples where each sample is a dictionary with 'text' and 'label' keys.  
        feature_type (str): The type of feature to extract for testing.  
  
    Returns:  
        None  
    """  
    # Extract features and labels from the samples  
    features, labels = extract_by_feature_kind(samples, feature_type)  
      
    # Convert features to a numpy array and reshape for evaluation  
    features = np.array(features)  
    features = features.reshape(-1, 1)  
      
    # Evaluate the detector model using the extracted features and labels  
    evaluate_model(detector, features, labels)  


def general_process_multimodels_train_val_test(train_samples, val_samples, test_samples):  
    """  
    General process for training, validating, and testing models using multi-model and feature kind approaches.  
  
    Args:  
        train_samples (list): Training samples.  
        val_samples (list): Validation samples.  
        test_samples (list): Test samples.  
  
    Returns:  
        None  
    """  
    # Multi-model approach  
    input_kind = MULTIMODEL  
    write_to_file(OUTPUT_FILE, f"\nInput kind = {input_kind} \n")  
      
    # Train detector using multi-model with early stopping  
    detector = train_by_transformer_with_multimodel_and_early_stop(train_samples, val_samples, input_kind)  
    detector.save_model("./models/multi_model_detector")
    
    # Evaluate on train set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON TRAIN SET \n")  
    test_by_transformer_with_multimodel(detector, train_samples, input_kind)  
      
    # Evaluate on validation set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON VALIDATION SET \n")  
    test_by_transformer_with_multimodel(detector, val_samples, input_kind)  
      
    # Evaluate on test set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON TEST SET \n")  
    test_by_transformer_with_multimodel(detector, test_samples, input_kind)  
      
    # Single from multi-model approach  
    input_kind = SINGLE_FROM_MULTIMODEL  
    write_to_file(OUTPUT_FILE, f"\nInput kind = {input_kind} \n")  
      
    # Train detector using single from multi-model with early stopping  
    detector = train_by_transformer_with_multimodel_and_early_stop(train_samples, val_samples, input_kind)  
    detector.save_model("./models/single_model_detector_1")
    
    # Evaluate on train set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON TRAIN SET \n")  
    test_by_transformer_with_multimodel(detector, train_samples, input_kind)  
      
    # Evaluate on validation set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON VALIDATION SET \n")  
    test_by_transformer_with_multimodel(detector, val_samples, input_kind)  
      
    # Evaluate on test set  
    write_to_file(OUTPUT_FILE, f"EVALUATE ON TEST SET \n")  
    test_by_transformer_with_multimodel(detector, test_samples, input_kind)  
      
    # Feature kind approach  
    sample_length = len(train_samples[0]["text"])  
    if sample_length == 2:  # Check if the sample length is 2, indicating BART feature kind  
        feature_kind = BART  
        write_to_file(OUTPUT_FILE, f"\nFeature kind = {feature_kind} \n")  
          
        # Train detector using feature kind  
        detector = train_by_feature_kind(train_samples, feature_kind)
          
        # Evaluate on train set  
        write_to_file(OUTPUT_FILE, f"EVALUATE ON TRAIN SET \n")  
        test_by_feature_kind(detector, train_samples, feature_kind)  
          
        # Evaluate on validation set  
        write_to_file(OUTPUT_FILE, f"EVALUATE ON VALIDATION SET \n")  
        test_by_feature_kind(detector, val_samples, feature_kind)  
          
        # Evaluate on test set  
        write_to_file(OUTPUT_FILE, f"EVALUATE ON TEST SET \n")  
        test_by_feature_kind(detector, test_samples, feature_kind)  


def process_multi_models_with_validation(multimodel_csv_file, train_indices, test_indices, num_samples):  
    """  
    Processes multi-model data with validation, training, and testing.  
  
    Args:  
        multimodel_csv_file (str): Path to the CSV file containing multi-model data.  
        train_indices (list): Indices for the training data.  
        test_indices (list): Indices for the testing data.  
        num_samples (int): Number of samples to process.  
  
    Returns:  
        None  
    """  
    # Log the details of the process  
    write_to_file(OUTPUT_FILE, f"PROCESSING FILE={multimodel_csv_file} \n")  
    write_to_file(OUTPUT_FILE, f"EXPERIMENT WITH {MODEL_NAME} model \n")  
    write_to_file(OUTPUT_FILE, f"NUMBER OF MAX EPOCHS WITH EARLY STOPPING = {NUMBER_OF_MAX_EPOCH_WITH_EARLY_STOPPING} \n")  
    write_to_file(OUTPUT_FILE, f"PATIENCE = {PATIENCE} \n")  
    write_to_file(OUTPUT_FILE, f"OPTIMIZED METRIC = {OPTIMIZED_METRIC} \n")  
    write_to_file(OUTPUT_FILE, f"BATCH SIZE = {BATCH_SIZE} \n")  
    write_to_file(OUTPUT_FILE, f"Number of samples = {num_samples} \n")  
  
    # Read multi-model data from the CSV file  
    data = read_multimodel_data_from_csv(multimodel_csv_file)  
      
    # Limit data to the specified number of samples  
    data = data[:num_samples]  
  
    # Distribute data into training, validation, and testing sets  
    train_samples, val_samples, test_samples = distribute_data(data, train_indices, test_indices, TRAIN_RATIO, VAL_RATIO)  
  
    # Log the training and testing indices  
    write_to_file(OUTPUT_FILE, f"Multimodel training with train indices {train_indices}, test with test indices {test_indices} \n")  
  
    # Process the multi-models for training, validation, and testing  
    general_process_multimodels_train_val_test(train_samples, val_samples, test_samples)  


def split_train_val_test(data, train_ratio, val_ratio):  
    """  
    Splits the dataset into training, validation, and test sets based on specified ratios.  
  
    Args:  
        data (list): The dataset to be split.  
        train_ratio (float): The ratio of the dataset to be used for training.  
        val_ratio (float): The ratio of the dataset to be used for validation.  
  
    Returns:  
        tuple: A tuple containing three lists - (train_data, val_data, test_data).  
    """  
    # Calculate the number of samples for the training set  
    num_train_samples = int(len(data) * train_ratio)  
      
    # Calculate the number of samples for the validation set  
    num_val_samples = int(len(data) * val_ratio)  
      
    # Split the data into training, validation, and test sets  
    train_data = data[:num_train_samples]  
    val_data = data[num_train_samples:(num_train_samples + num_val_samples)]  
    test_data = data[(num_train_samples + num_val_samples):]  
      
    return train_data, val_data, test_data  
 
  
def main():  
    """  
    Main function to handle argument parsing and execute the sequence of operations  
    including data generation and processing with multiple models.  
    """  
    parser = argparse.ArgumentParser(description='SimLLM.')  
  
    # Argument for specifying the list of large language models  
    parser.add_argument('--LLMs', nargs="+", default=[CHATGPT],#, "Yi", "OpenChat"],   
                        help='List of large language models')  
  
    # Argument for specifying the list of training indexes  
    parser.add_argument('--train_indexes', type=int, default=[0,1,2], nargs="+",   
                        help='List of training indexes')  
  
    # Argument for specifying the list of testing indexes  
    parser.add_argument('--test_indexes', type=int, default=[0], nargs="+",   
                        help='List of testing indexes')  
  
    # Argument for specifying the number of samples  
    parser.add_argument('--num_samples', type=int, default=5000,   
                        help='Number of samples')  

    # Argument for multimodel_csv_file
    parser.add_argument('--multimodel_csv_file', type=str, default="data/ChatGPT_Nous_Hermes_2_Yi_34B_openchat_3_5_1210_with_best_similarity.csv",    
                        help='multimodel_csv_file')  

    # Parse the command-line arguments  
    args = parser.parse_args()  
  
    if args.multimodel_csv_file == "":
        # Static dataset parameters  
        dataset_name = "xsum"  
        column_name = "document"  
        num_samples = args.num_samples  
        output_file = "data/test.csv"  
    
        # Generate human data with shuffle  
        # generate_human_with_shuffle(dataset_name, column_name, num_samples, output_file)  
    
        # Existing data parameters  
        existing_data_file = output_file  
        existing_kinds = []  
    
        # New kinds of models to generate data with  
        new_kinds = args.LLMs  
    
        # Generate new data with best similarity  
        generate_new_data_with_best_similarity(existing_data_file, existing_kinds, new_kinds)  
    
        # Generate a filename for the multimodel CSV file  
        multimodel_csv_file = generate_file_name(existing_data_file, existing_kinds, new_kinds)  
    
    else: 
        multimodel_csv_file = args.multimodel_csv_file
        
    # Number of samples to process (-1 means process all samples)  
    num_samples_to_process = -1  
  
    # Training and testing indexes from arguments  
    training_indexes = args.train_indexes  
    testing_indexes = args.test_indexes  
  
    # Process multiple models with validation  
    process_multi_models_with_validation(multimodel_csv_file, training_indexes, testing_indexes, num_samples_to_process)  
  
if __name__ == "__main__":  
    main()