Spaces:
Sleeping
Sleeping
import csv | |
import os | |
from dotenv import load_dotenv | |
from openai import ( | |
AzureOpenAI, | |
OpenAIError, | |
) | |
def get_first_column(csv_filepath): | |
""" | |
Reads a CSV file with a header and returns a list containing only the | |
values from the first column. | |
Args: | |
csv_filepath: The path to the CSV file. | |
Returns: | |
A list of strings, where each string is a value from the first | |
column of the CSV file. | |
Returns an empty list if there's an error opening or reading | |
the file, or if the file has no rows after the header. | |
Prints an error message to the console in case of file errors. | |
""" | |
first_column_values = [] | |
try: | |
with open( | |
csv_filepath, | |
newline="", | |
encoding="utf-8", | |
) as csvfile: # Handle potential encoding issues | |
reader = csv.reader(csvfile) | |
next(reader, None) # Skip the header row (if it exists) | |
for row in reader: | |
if row: # Check for empty rows | |
first_column_values.append(row[0]) | |
except FileNotFoundError: | |
print(f"Error: File not found at {csv_filepath}") | |
except ( | |
Exception | |
) as e: # Catch other potential errors (e.g., UnicodeDecodeError) | |
print(f"An error occurred: {e}") | |
return first_column_values | |
def add_text_to_csv(csv_filepath, text_to_add, index=0): | |
""" | |
Adds text to a single-column CSV file (UTF-8 encoding). | |
Args: | |
csv_filepath: The path to the CSV file. | |
text_to_add: The text to append to CSV file (one value per new row). | |
""" | |
try: | |
with open( | |
csv_filepath, | |
"a", | |
newline="", | |
encoding="utf-8", | |
) as csvfile: # 'a' for append mode | |
writer = csv.writer(csvfile) | |
# Check if file is empty to determine if header needs to be written | |
csvfile.seek(0, 2) # Go to end of file | |
is_empty = csvfile.tell() == 0 | |
if is_empty: | |
writer.writerow(["#", "Text"]) # Write header if file is empty | |
if isinstance(text_to_add, list): # Check if text_to_add is a list | |
for text_item in text_to_add: | |
writer.writerow( | |
[index, text_item], | |
) # Write text_item as a single-element row | |
else: # If not a list, assume it's a single string | |
writer.writerow( | |
[index, text_to_add], | |
) # Write text_to_add as a single-element row | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
load_dotenv() | |
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") | |
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") | |
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") | |
azure_client = AzureOpenAI( | |
azure_endpoint=AZURE_OPENAI_ENDPOINT, | |
api_key=AZURE_OPENAI_API_KEY, | |
api_version=AZURE_OPENAI_API_VERSION, | |
# API VERSION=[ 2024-12-01-preview, 2024-05-01-preview] | |
) | |
deplopment_name = "gpt-4o" # or "gpt-4o-mini" # "o1-mini" # or "gpt-4o" | |
TEXT_PROMPT = """ | |
Paraphrase the following news, only output the paraphrased text: | |
""" | |
text = get_first_column("data/MAGE_2.csv") | |
count = 0 | |
for index, news in enumerate(text): | |
if count > 1000: | |
break | |
prompt = TEXT_PROMPT + news | |
print(f"{index:5}:\t{news[:50]}") | |
# print(f"{index:5}:\t{prompt}") | |
try: | |
response = azure_client.chat.completions.create( | |
model=deplopment_name, # model = "deployment_name". | |
messages=[ | |
# {"role": "system", "content": "You're an assistant."}, | |
{"role": "user", "content": prompt}, | |
], | |
# max_tokens=512, | |
# temperature=0, | |
) | |
except OpenAIError as e: | |
print(f"Error interacting with OpenAI API: {e}") | |
continue | |
count += 1 | |
paraphrased_news = response.choices[0].message.content | |
add_text_to_csv("data/test.csv", paraphrased_news, count) | |