Spaces:
Sleeping
Sleeping
Commit
·
5d842c6
1
Parent(s):
38fd181
update generated content
Browse files- gpt_test.py +92 -17
- src/application/content_detection.py +1 -1
- test.py +13 -55
gpt_test.py
CHANGED
@@ -1,7 +1,69 @@
|
|
1 |
import os
|
2 |
|
3 |
from dotenv import load_dotenv
|
4 |
-
from openai import AzureOpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
load_dotenv()
|
7 |
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
|
@@ -14,22 +76,35 @@ azure_client = AzureOpenAI(
|
|
14 |
api_version="2024-05-01-preview",
|
15 |
)
|
16 |
|
17 |
-
deplopment_name = "o1-mini" # or "gpt-4o"
|
18 |
TEXT_PROMPT = """
|
19 |
-
|
20 |
|
21 |
-
"Sir Keir Starmer has pledged to put Ukraine in the "strongest
|
22 |
-
possible position" on a trip to Kyiv where he signed a
|
23 |
-
"landmark" 100-year pact with the war-stricken country.
|
24 |
"""
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
]
|
32 |
-
#
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
|
3 |
from dotenv import load_dotenv
|
4 |
+
from openai import AzureOpenAI, OpenAIError
|
5 |
+
|
6 |
+
|
7 |
+
import csv
|
8 |
+
|
9 |
+
def get_first_column(csv_filepath):
|
10 |
+
"""
|
11 |
+
Reads a CSV file with a header and returns a list containing only the
|
12 |
+
values from the first column.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
csv_filepath: The path to the CSV file.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
A list of strings, where each string is a value from the first
|
19 |
+
column of the CSV file. Returns an empty list if there's an error
|
20 |
+
opening or reading the file, or if the file has no rows after the header.
|
21 |
+
Prints an error message to the console in case of file errors.
|
22 |
+
"""
|
23 |
+
first_column_values = []
|
24 |
+
try:
|
25 |
+
with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile: # Handle potential encoding issues
|
26 |
+
reader = csv.reader(csvfile)
|
27 |
+
next(reader, None) # Skip the header row (if it exists)
|
28 |
+
|
29 |
+
for row in reader:
|
30 |
+
if row: # Check for empty rows
|
31 |
+
first_column_values.append(row[0])
|
32 |
+
|
33 |
+
except FileNotFoundError:
|
34 |
+
print(f"Error: File not found at {csv_filepath}")
|
35 |
+
except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError)
|
36 |
+
print(f"An error occurred: {e}")
|
37 |
+
|
38 |
+
return first_column_values
|
39 |
+
|
40 |
+
def add_text_to_csv(csv_filepath, text_to_add, index=0):
|
41 |
+
"""
|
42 |
+
Adds text to a single-column CSV file (UTF-8 encoding).
|
43 |
+
|
44 |
+
Args:
|
45 |
+
csv_filepath: The path to the CSV file.
|
46 |
+
text_to_add: The text to append to the CSV file (one value per new row).
|
47 |
+
"""
|
48 |
+
try:
|
49 |
+
with open(csv_filepath, 'a', newline='', encoding='utf-8') as csvfile: # 'a' for append mode
|
50 |
+
writer = csv.writer(csvfile)
|
51 |
+
|
52 |
+
# Check if file is empty to determine if header needs to be written
|
53 |
+
csvfile.seek(0, 2) # Go to end of file
|
54 |
+
is_empty = csvfile.tell() == 0
|
55 |
+
|
56 |
+
if is_empty:
|
57 |
+
writer.writerow(["#", "Text"]) # Write header if file is empty
|
58 |
+
|
59 |
+
if isinstance(text_to_add, list): # Check if text_to_add is a list
|
60 |
+
for text_item in text_to_add:
|
61 |
+
writer.writerow([index, text_item]) # Write text_item as a single-element row
|
62 |
+
else: # If not a list, assume it's a single string
|
63 |
+
writer.writerow([index, text_to_add]) # Write text_to_add as a single-element row
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
print(f"An error occurred: {e}")
|
67 |
|
68 |
load_dotenv()
|
69 |
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
|
|
|
76 |
api_version="2024-05-01-preview",
|
77 |
)
|
78 |
|
79 |
+
deplopment_name = "gpt-4o" # "o1-mini" # or "gpt-4o"
|
80 |
TEXT_PROMPT = """
|
81 |
+
Paraphrase the following news, only output the paraphrased text:
|
82 |
|
|
|
|
|
|
|
83 |
"""
|
84 |
+
text = get_first_column("data/bbc_news.csv")
|
85 |
+
count = 0
|
86 |
+
for index, news in enumerate(text):
|
87 |
+
if count > 1000:
|
88 |
+
break
|
89 |
+
prompt = TEXT_PROMPT + news
|
90 |
+
print(f"{index:5}:\t{news[:50]}")
|
91 |
+
#print(f"{index:5}:\t{prompt}")
|
92 |
+
|
93 |
+
try:
|
94 |
+
response = azure_client.chat.completions.create(
|
95 |
+
model=deplopment_name, # model = "deployment_name".
|
96 |
+
messages=[
|
97 |
+
# {"role": "system", "content": "You are a helpful assistant."},
|
98 |
+
{"role": "user", "content": prompt},
|
99 |
+
],
|
100 |
+
# max_tokens=512,
|
101 |
+
# temperature=0,
|
102 |
+
)
|
103 |
+
except OpenAIError as e:
|
104 |
+
print(f"Error interacting with OpenAI API: {e}")
|
105 |
+
continue
|
106 |
+
|
107 |
+
count += 1
|
108 |
+
paraphrased_news = response.choices[0].message.content
|
109 |
+
|
110 |
+
add_text_to_csv("data/bbc_news_4o.csv", paraphrased_news, count)
|
src/application/content_detection.py
CHANGED
@@ -470,7 +470,7 @@ class NewsVerification:
|
|
470 |
|
471 |
return f"""
|
472 |
<h5>Comparison between input news and source news:</h5>
|
473 |
-
<table border="1" style="width:100%; text-align:left;
|
474 |
<col style="width: 170px;">
|
475 |
<col style="width: 170px;">
|
476 |
<col style="width: 30px;">
|
|
|
470 |
|
471 |
return f"""
|
472 |
<h5>Comparison between input news and source news:</h5>
|
473 |
+
<table border="1" style="width:100%; text-align:left;">
|
474 |
<col style="width: 170px;">
|
475 |
<col style="width: 170px;">
|
476 |
<col style="width: 30px;">
|
test.py
CHANGED
@@ -1,55 +1,13 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
of a found entity. Returns an empty list if no entities are found.
|
15 |
-
"""
|
16 |
-
spans = []
|
17 |
-
for m in re.finditer(
|
18 |
-
r"\b" + re.escape(entity) + r"\b",
|
19 |
-
text,
|
20 |
-
): # The crucial change
|
21 |
-
spans.append((m.start(), m.end()))
|
22 |
-
return spans
|
23 |
-
|
24 |
-
|
25 |
-
# Example usage:
|
26 |
-
temp_text = "win winger winning"
|
27 |
-
entity = {"key": "win"} # Example dictionary (adjust as needed)
|
28 |
-
|
29 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
30 |
-
print(spans) # Output: [(0, 3)] (Only "win" at the beginning)
|
31 |
-
|
32 |
-
temp_text = "The quick brown fox jumps over the lazy dog."
|
33 |
-
entity = {"key": "fox"}
|
34 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
35 |
-
print(spans) # Output: [(16, 19)]
|
36 |
-
|
37 |
-
temp_text = "foxes fox foxing"
|
38 |
-
entity = {"key": "fox"}
|
39 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
40 |
-
print(spans) # Output: [(0, 3), (6, 9)]
|
41 |
-
|
42 |
-
temp_text = "winger win winning"
|
43 |
-
entity = {"key": "win"}
|
44 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
45 |
-
print(spans) # Output: [(8, 11)]
|
46 |
-
|
47 |
-
temp_text = "winger win winning"
|
48 |
-
entity = {"key": "winger"}
|
49 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
50 |
-
print(spans) # Output: [(0, 6)]
|
51 |
-
|
52 |
-
temp_text = "winger win winning"
|
53 |
-
entity = {"key": "winning"}
|
54 |
-
spans = find_entity_spans(entity["key"], temp_text)
|
55 |
-
print(spans) # Output: [(12, 19)]
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
# Create an empty DataFrame with 5 columns
|
5 |
+
df = pd.DataFrame(columns=['col1', 'col2', 'col3', 'col4', 'col5']) # Or any column names you want
|
6 |
+
|
7 |
+
# Method 1: Using a dictionary and append (less efficient for large DataFrames)
|
8 |
+
for _ in range(5): # Add 5 rows
|
9 |
+
df = pd.concat([df, pd.DataFrame([{'col1': np.nan, 'col2': np.nan, 'col3': np.nan, 'col4': np.nan, 'col5': np.nan}])], ignore_index=True)
|
10 |
+
d = {"col1": "ta", "col2": "gs"}
|
11 |
+
df.loc[1, "col1"] = d["col1"]
|
12 |
+
for index, row in enumerate(df):
|
13 |
+
print(index)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|