File size: 4,070 Bytes
bfe6692
504f37b
38fd181
504f37b
bfe6692
 
 
 
5d842c6
 
 
 
bfe6692
5d842c6
 
 
 
 
 
bfe6692
 
 
 
5d842c6
 
 
 
bfe6692
 
 
 
 
5d842c6
 
 
 
 
 
 
 
 
bfe6692
 
 
5d842c6
bfe6692
5d842c6
 
bfe6692
5d842c6
 
 
 
 
 
bfe6692
5d842c6
 
bfe6692
 
 
 
 
 
5d842c6
 
 
 
 
 
 
 
 
 
 
bfe6692
 
 
5d842c6
bfe6692
 
 
5d842c6
 
 
38fd181
bfe6692
504f37b
38fd181
 
 
504f37b
 
00b1038
38fd181
00b1038
 
504f37b
38fd181
0827f9d
504f37b
5d842c6
504f37b
 
a5e8d12
bfe6692
5d842c6
 
 
 
 
bfe6692
 
5d842c6
 
 
 
bfe6692
5d842c6
 
 
 
 
 
 
 
bfe6692
5d842c6
 
bfe6692
00b1038
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import csv
import os

from dotenv import load_dotenv
from openai import (
    AzureOpenAI,
    OpenAIError,
)


def get_first_column(csv_filepath):
    """
    Reads a CSV file with a header and returns a list containing only the
    values from the first column.

    Args:
        csv_filepath: The path to the CSV file.

    Returns:
        A list of strings, where each string is a value from the first
        column of the CSV file.
        Returns an empty list if there's an error opening or reading
        the file, or if the file has no rows after the header.
        Prints an error message to the console in case of file errors.
    """
    first_column_values = []
    try:
        with open(
            csv_filepath,
            newline="",
            encoding="utf-8",
        ) as csvfile:  # Handle potential encoding issues
            reader = csv.reader(csvfile)
            next(reader, None)  # Skip the header row (if it exists)

            for row in reader:
                if row:  # Check for empty rows
                    first_column_values.append(row[0])

    except FileNotFoundError:
        print(f"Error: File not found at {csv_filepath}")
    except (
        Exception
    ) as e:  # Catch other potential errors (e.g., UnicodeDecodeError)
        print(f"An error occurred: {e}")

    return first_column_values


def add_text_to_csv(csv_filepath, text_to_add, index=0):
    """
    Adds text to a single-column CSV file (UTF-8 encoding).

    Args:
        csv_filepath: The path to the CSV file.
        text_to_add: The text to append to CSV file (one value per new row).
    """
    try:
        with open(
            csv_filepath,
            "a",
            newline="",
            encoding="utf-8",
        ) as csvfile:  # 'a' for append mode
            writer = csv.writer(csvfile)

            # Check if file is empty to determine if header needs to be written
            csvfile.seek(0, 2)  # Go to end of file
            is_empty = csvfile.tell() == 0

            if is_empty:
                writer.writerow(["#", "Text"])  # Write header if file is empty

            if isinstance(text_to_add, list):  # Check if text_to_add is a list
                for text_item in text_to_add:
                    writer.writerow(
                        [index, text_item],
                    )  # Write text_item as a single-element row
            else:  # If not a list, assume it's a single string
                writer.writerow(
                    [index, text_to_add],
                )  # Write text_to_add as a single-element row

    except Exception as e:
        print(f"An error occurred: {e}")


load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")

azure_client = AzureOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VERSION,
    # API VERSION=[ 2024-12-01-preview, 2024-05-01-preview]
)

deplopment_name = "gpt-4o"  # or "gpt-4o-mini"  # "o1-mini"  # or "gpt-4o"
TEXT_PROMPT = """
Paraphrase the following news, only output the paraphrased text:

"""
text = get_first_column("data/MAGE_2.csv")
count = 0
for index, news in enumerate(text):
    if count > 1000:
        break
    prompt = TEXT_PROMPT + news
    print(f"{index:5}:\t{news[:50]}")
    # print(f"{index:5}:\t{prompt}")

    try:
        response = azure_client.chat.completions.create(
            model=deplopment_name,  # model = "deployment_name".
            messages=[
                # {"role": "system", "content": "You're an assistant."},
                {"role": "user", "content": prompt},
            ],
            # max_tokens=512,
            # temperature=0,
        )
    except OpenAIError as e:
        print(f"Error interacting with OpenAI API: {e}")
        continue

    count += 1
    paraphrased_news = response.choices[0].message.content

    add_text_to_csv("data/test.csv", paraphrased_news, count)