File size: 3,819 Bytes
504f37b
38fd181
504f37b
5d842c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38fd181
504f37b
38fd181
 
 
504f37b
 
38fd181
 
 
504f37b
38fd181
62dc9d8
504f37b
5d842c6
504f37b
 
62dc9d8
5d842c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62dc9d8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os

from dotenv import load_dotenv
from openai import AzureOpenAI, OpenAIError


import csv

def get_first_column(csv_filepath):
    """
    Reads a CSV file with a header and returns a list containing only the 
    values from the first column.

    Args:
        csv_filepath: The path to the CSV file.

    Returns:
        A list of strings, where each string is a value from the first 
        column of the CSV file.  Returns an empty list if there's an error 
        opening or reading the file, or if the file has no rows after the header.
        Prints an error message to the console in case of file errors.
    """
    first_column_values = []
    try:
        with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:  # Handle potential encoding issues
            reader = csv.reader(csvfile)
            next(reader, None)  # Skip the header row (if it exists)

            for row in reader:
                if row:  # Check for empty rows
                    first_column_values.append(row[0])

    except FileNotFoundError:
        print(f"Error: File not found at {csv_filepath}")
    except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError)
        print(f"An error occurred: {e}")
    
    return first_column_values

def add_text_to_csv(csv_filepath, text_to_add, index=0):
    """
    Adds text to a single-column CSV file (UTF-8 encoding).

    Args:
        csv_filepath: The path to the CSV file.
        text_to_add: The text to append to the CSV file (one value per new row).
    """
    try:
        with open(csv_filepath, 'a', newline='', encoding='utf-8') as csvfile:  # 'a' for append mode
            writer = csv.writer(csvfile)

            # Check if file is empty to determine if header needs to be written
            csvfile.seek(0, 2)  # Go to end of file
            is_empty = csvfile.tell() == 0

            if is_empty:
                writer.writerow(["#", "Text"])  # Write header if file is empty

            if isinstance(text_to_add, list):  # Check if text_to_add is a list
                for text_item in text_to_add:
                    writer.writerow([index, text_item]) # Write text_item as a single-element row
            else:  # If not a list, assume it's a single string
                writer.writerow([index, text_to_add])  # Write text_to_add as a single-element row

    except Exception as e:
        print(f"An error occurred: {e}")

load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")

azure_client = AzureOpenAI(
    azure_endpoint="https://quoc-nguyen.openai.azure.com/",
    api_key=AZURE_OPENAI_API_KEY,
    api_version="2024-05-01-preview",
)

deplopment_name = "gpt-4o-mini" # "o1-mini"  # or "gpt-4o"
TEXT_PROMPT = """
Paraphrase the following news, only output the paraphrased text:

"""
text = get_first_column("data/MAGE.csv")
count  = 0
for index, news in enumerate(text):
    if count > 1000:
        break
    prompt = TEXT_PROMPT + news
    print(f"{index:5}:\t{news[:50]}")
    #print(f"{index:5}:\t{prompt}")
    
    try:
        response = azure_client.chat.completions.create(
            model=deplopment_name,  # model = "deployment_name".
            messages=[
                # {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt},
            ],
            # max_tokens=512,
            # temperature=0,
        )
    except OpenAIError as e:
        print(f"Error interacting with OpenAI API: {e}")
        continue
    
    count += 1
    paraphrased_news = response.choices[0].message.content
    
    add_text_to_csv("data/MAGE_4o_mini.csv", paraphrased_news, count)