Spaces:
Sleeping
Sleeping
File size: 4,070 Bytes
bfe6692 504f37b 38fd181 504f37b bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 38fd181 bfe6692 504f37b 38fd181 504f37b 00b1038 38fd181 00b1038 504f37b 38fd181 0827f9d 504f37b 5d842c6 504f37b a5e8d12 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 5d842c6 bfe6692 00b1038 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import csv
import os
from dotenv import load_dotenv
from openai import (
AzureOpenAI,
OpenAIError,
)
def get_first_column(csv_filepath):
"""
Reads a CSV file with a header and returns a list containing only the
values from the first column.
Args:
csv_filepath: The path to the CSV file.
Returns:
A list of strings, where each string is a value from the first
column of the CSV file.
Returns an empty list if there's an error opening or reading
the file, or if the file has no rows after the header.
Prints an error message to the console in case of file errors.
"""
first_column_values = []
try:
with open(
csv_filepath,
newline="",
encoding="utf-8",
) as csvfile: # Handle potential encoding issues
reader = csv.reader(csvfile)
next(reader, None) # Skip the header row (if it exists)
for row in reader:
if row: # Check for empty rows
first_column_values.append(row[0])
except FileNotFoundError:
print(f"Error: File not found at {csv_filepath}")
except (
Exception
) as e: # Catch other potential errors (e.g., UnicodeDecodeError)
print(f"An error occurred: {e}")
return first_column_values
def add_text_to_csv(csv_filepath, text_to_add, index=0):
"""
Adds text to a single-column CSV file (UTF-8 encoding).
Args:
csv_filepath: The path to the CSV file.
text_to_add: The text to append to CSV file (one value per new row).
"""
try:
with open(
csv_filepath,
"a",
newline="",
encoding="utf-8",
) as csvfile: # 'a' for append mode
writer = csv.writer(csvfile)
# Check if file is empty to determine if header needs to be written
csvfile.seek(0, 2) # Go to end of file
is_empty = csvfile.tell() == 0
if is_empty:
writer.writerow(["#", "Text"]) # Write header if file is empty
if isinstance(text_to_add, list): # Check if text_to_add is a list
for text_item in text_to_add:
writer.writerow(
[index, text_item],
) # Write text_item as a single-element row
else: # If not a list, assume it's a single string
writer.writerow(
[index, text_to_add],
) # Write text_to_add as a single-element row
except Exception as e:
print(f"An error occurred: {e}")
load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
azure_client = AzureOpenAI(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_key=AZURE_OPENAI_API_KEY,
api_version=AZURE_OPENAI_API_VERSION,
# API VERSION=[ 2024-12-01-preview, 2024-05-01-preview]
)
deplopment_name = "gpt-4o" # or "gpt-4o-mini" # "o1-mini" # or "gpt-4o"
TEXT_PROMPT = """
Paraphrase the following news, only output the paraphrased text:
"""
text = get_first_column("data/MAGE_2.csv")
count = 0
for index, news in enumerate(text):
if count > 1000:
break
prompt = TEXT_PROMPT + news
print(f"{index:5}:\t{news[:50]}")
# print(f"{index:5}:\t{prompt}")
try:
response = azure_client.chat.completions.create(
model=deplopment_name, # model = "deployment_name".
messages=[
# {"role": "system", "content": "You're an assistant."},
{"role": "user", "content": prompt},
],
# max_tokens=512,
# temperature=0,
)
except OpenAIError as e:
print(f"Error interacting with OpenAI API: {e}")
continue
count += 1
paraphrased_news = response.choices[0].message.content
add_text_to_csv("data/test.csv", paraphrased_news, count)
|