import csv import os from dotenv import load_dotenv from openai import ( AzureOpenAI, OpenAIError, ) def get_first_column(csv_filepath): """ Reads a CSV file with a header and returns a list containing only the values from the first column. Args: csv_filepath: The path to the CSV file. Returns: A list of strings, where each string is a value from the first column of the CSV file. Returns an empty list if there's an error opening or reading the file, or if the file has no rows after the header. Prints an error message to the console in case of file errors. """ first_column_values = [] try: with open( csv_filepath, newline="", encoding="utf-8", ) as csvfile: # Handle potential encoding issues reader = csv.reader(csvfile) next(reader, None) # Skip the header row (if it exists) for row in reader: if row: # Check for empty rows first_column_values.append(row[0]) except FileNotFoundError: print(f"Error: File not found at {csv_filepath}") except ( Exception ) as e: # Catch other potential errors (e.g., UnicodeDecodeError) print(f"An error occurred: {e}") return first_column_values def add_text_to_csv(csv_filepath, text_to_add, index=0): """ Adds text to a single-column CSV file (UTF-8 encoding). Args: csv_filepath: The path to the CSV file. text_to_add: The text to append to CSV file (one value per new row). """ try: with open( csv_filepath, "a", newline="", encoding="utf-8", ) as csvfile: # 'a' for append mode writer = csv.writer(csvfile) # Check if file is empty to determine if header needs to be written csvfile.seek(0, 2) # Go to end of file is_empty = csvfile.tell() == 0 if is_empty: writer.writerow(["#", "Text"]) # Write header if file is empty if isinstance(text_to_add, list): # Check if text_to_add is a list for text_item in text_to_add: writer.writerow( [index, text_item], ) # Write text_item as a single-element row else: # If not a list, assume it's a single string writer.writerow( [index, text_to_add], ) # Write text_to_add as a single-element row except Exception as e: print(f"An error occurred: {e}") load_dotenv() AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") azure_client = AzureOpenAI( azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION, # API VERSION=[ 2024-12-01-preview, 2024-05-01-preview] ) deplopment_name = "gpt-4o" # or "gpt-4o-mini" # "o1-mini" # or "gpt-4o" TEXT_PROMPT = """ Paraphrase the following news, only output the paraphrased text: """ text = get_first_column("data/MAGE_2.csv") count = 0 for index, news in enumerate(text): if count > 1000: break prompt = TEXT_PROMPT + news print(f"{index:5}:\t{news[:50]}") # print(f"{index:5}:\t{prompt}") try: response = azure_client.chat.completions.create( model=deplopment_name, # model = "deployment_name". messages=[ # {"role": "system", "content": "You're an assistant."}, {"role": "user", "content": prompt}, ], # max_tokens=512, # temperature=0, ) except OpenAIError as e: print(f"Error interacting with OpenAI API: {e}") continue count += 1 paraphrased_news = response.choices[0].message.content add_text_to_csv("data/test.csv", paraphrased_news, count)