news_verification / gpt_test.py
pmkhanh7890's picture
refactor code + fix bug of label after grouping url
00b1038
raw
history blame
4.07 kB
import csv
import os
from dotenv import load_dotenv
from openai import (
AzureOpenAI,
OpenAIError,
)
def get_first_column(csv_filepath):
"""
Reads a CSV file with a header and returns a list containing only the
values from the first column.
Args:
csv_filepath: The path to the CSV file.
Returns:
A list of strings, where each string is a value from the first
column of the CSV file.
Returns an empty list if there's an error opening or reading
the file, or if the file has no rows after the header.
Prints an error message to the console in case of file errors.
"""
first_column_values = []
try:
with open(
csv_filepath,
newline="",
encoding="utf-8",
) as csvfile: # Handle potential encoding issues
reader = csv.reader(csvfile)
next(reader, None) # Skip the header row (if it exists)
for row in reader:
if row: # Check for empty rows
first_column_values.append(row[0])
except FileNotFoundError:
print(f"Error: File not found at {csv_filepath}")
except (
Exception
) as e: # Catch other potential errors (e.g., UnicodeDecodeError)
print(f"An error occurred: {e}")
return first_column_values
def add_text_to_csv(csv_filepath, text_to_add, index=0):
"""
Adds text to a single-column CSV file (UTF-8 encoding).
Args:
csv_filepath: The path to the CSV file.
text_to_add: The text to append to CSV file (one value per new row).
"""
try:
with open(
csv_filepath,
"a",
newline="",
encoding="utf-8",
) as csvfile: # 'a' for append mode
writer = csv.writer(csvfile)
# Check if file is empty to determine if header needs to be written
csvfile.seek(0, 2) # Go to end of file
is_empty = csvfile.tell() == 0
if is_empty:
writer.writerow(["#", "Text"]) # Write header if file is empty
if isinstance(text_to_add, list): # Check if text_to_add is a list
for text_item in text_to_add:
writer.writerow(
[index, text_item],
) # Write text_item as a single-element row
else: # If not a list, assume it's a single string
writer.writerow(
[index, text_to_add],
) # Write text_to_add as a single-element row
except Exception as e:
print(f"An error occurred: {e}")
load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
azure_client = AzureOpenAI(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_key=AZURE_OPENAI_API_KEY,
api_version=AZURE_OPENAI_API_VERSION,
# API VERSION=[ 2024-12-01-preview, 2024-05-01-preview]
)
deplopment_name = "gpt-4o" # or "gpt-4o-mini" # "o1-mini" # or "gpt-4o"
TEXT_PROMPT = """
Paraphrase the following news, only output the paraphrased text:
"""
text = get_first_column("data/MAGE_2.csv")
count = 0
for index, news in enumerate(text):
if count > 1000:
break
prompt = TEXT_PROMPT + news
print(f"{index:5}:\t{news[:50]}")
# print(f"{index:5}:\t{prompt}")
try:
response = azure_client.chat.completions.create(
model=deplopment_name, # model = "deployment_name".
messages=[
# {"role": "system", "content": "You're an assistant."},
{"role": "user", "content": prompt},
],
# max_tokens=512,
# temperature=0,
)
except OpenAIError as e:
print(f"Error interacting with OpenAI API: {e}")
continue
count += 1
paraphrased_news = response.choices[0].message.content
add_text_to_csv("data/test.csv", paraphrased_news, count)