pmkhanh7890 commited on
Commit
5d842c6
·
1 Parent(s): 38fd181

update generated content

Browse files
Files changed (3) hide show
  1. gpt_test.py +92 -17
  2. src/application/content_detection.py +1 -1
  3. test.py +13 -55
gpt_test.py CHANGED
@@ -1,7 +1,69 @@
1
  import os
2
 
3
  from dotenv import load_dotenv
4
- from openai import AzureOpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  load_dotenv()
7
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
@@ -14,22 +76,35 @@ azure_client = AzureOpenAI(
14
  api_version="2024-05-01-preview",
15
  )
16
 
17
- deplopment_name = "o1-mini" # or "gpt-4o"
18
  TEXT_PROMPT = """
19
- replace Ukraine with Denmark:
20
 
21
- "Sir Keir Starmer has pledged to put Ukraine in the "strongest
22
- possible position" on a trip to Kyiv where he signed a
23
- "landmark" 100-year pact with the war-stricken country.
24
  """
25
-
26
- response = azure_client.chat.completions.create(
27
- model=deplopment_name, # model = "deployment_name".
28
- messages=[
29
- # {"role": "system", "content": "You are a helpful assistant."},
30
- {"role": "user", "content": TEXT_PROMPT},
31
- ],
32
- # max_tokens=512,
33
- # temperature=0,
34
- )
35
- print(response.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
 
3
  from dotenv import load_dotenv
4
+ from openai import AzureOpenAI, OpenAIError
5
+
6
+
7
+ import csv
8
+
9
+ def get_first_column(csv_filepath):
10
+ """
11
+ Reads a CSV file with a header and returns a list containing only the
12
+ values from the first column.
13
+
14
+ Args:
15
+ csv_filepath: The path to the CSV file.
16
+
17
+ Returns:
18
+ A list of strings, where each string is a value from the first
19
+ column of the CSV file. Returns an empty list if there's an error
20
+ opening or reading the file, or if the file has no rows after the header.
21
+ Prints an error message to the console in case of file errors.
22
+ """
23
+ first_column_values = []
24
+ try:
25
+ with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile: # Handle potential encoding issues
26
+ reader = csv.reader(csvfile)
27
+ next(reader, None) # Skip the header row (if it exists)
28
+
29
+ for row in reader:
30
+ if row: # Check for empty rows
31
+ first_column_values.append(row[0])
32
+
33
+ except FileNotFoundError:
34
+ print(f"Error: File not found at {csv_filepath}")
35
+ except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError)
36
+ print(f"An error occurred: {e}")
37
+
38
+ return first_column_values
39
+
40
+ def add_text_to_csv(csv_filepath, text_to_add, index=0):
41
+ """
42
+ Adds text to a single-column CSV file (UTF-8 encoding).
43
+
44
+ Args:
45
+ csv_filepath: The path to the CSV file.
46
+ text_to_add: The text to append to the CSV file (one value per new row).
47
+ """
48
+ try:
49
+ with open(csv_filepath, 'a', newline='', encoding='utf-8') as csvfile: # 'a' for append mode
50
+ writer = csv.writer(csvfile)
51
+
52
+ # Check if file is empty to determine if header needs to be written
53
+ csvfile.seek(0, 2) # Go to end of file
54
+ is_empty = csvfile.tell() == 0
55
+
56
+ if is_empty:
57
+ writer.writerow(["#", "Text"]) # Write header if file is empty
58
+
59
+ if isinstance(text_to_add, list): # Check if text_to_add is a list
60
+ for text_item in text_to_add:
61
+ writer.writerow([index, text_item]) # Write text_item as a single-element row
62
+ else: # If not a list, assume it's a single string
63
+ writer.writerow([index, text_to_add]) # Write text_to_add as a single-element row
64
+
65
+ except Exception as e:
66
+ print(f"An error occurred: {e}")
67
 
68
  load_dotenv()
69
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
 
76
  api_version="2024-05-01-preview",
77
  )
78
 
79
+ deplopment_name = "gpt-4o" # "o1-mini" # or "gpt-4o"
80
  TEXT_PROMPT = """
81
+ Paraphrase the following news, only output the paraphrased text:
82
 
 
 
 
83
  """
84
+ text = get_first_column("data/bbc_news.csv")
85
+ count = 0
86
+ for index, news in enumerate(text):
87
+ if count > 1000:
88
+ break
89
+ prompt = TEXT_PROMPT + news
90
+ print(f"{index:5}:\t{news[:50]}")
91
+ #print(f"{index:5}:\t{prompt}")
92
+
93
+ try:
94
+ response = azure_client.chat.completions.create(
95
+ model=deplopment_name, # model = "deployment_name".
96
+ messages=[
97
+ # {"role": "system", "content": "You are a helpful assistant."},
98
+ {"role": "user", "content": prompt},
99
+ ],
100
+ # max_tokens=512,
101
+ # temperature=0,
102
+ )
103
+ except OpenAIError as e:
104
+ print(f"Error interacting with OpenAI API: {e}")
105
+ continue
106
+
107
+ count += 1
108
+ paraphrased_news = response.choices[0].message.content
109
+
110
+ add_text_to_csv("data/bbc_news_4o.csv", paraphrased_news, count)
src/application/content_detection.py CHANGED
@@ -470,7 +470,7 @@ class NewsVerification:
470
 
471
  return f"""
472
  <h5>Comparison between input news and source news:</h5>
473
- <table border="1" style="width:100%; text-align:left; border-collapse:collapse;"> # noqa: E501
474
  <col style="width: 170px;">
475
  <col style="width: 170px;">
476
  <col style="width: 30px;">
 
470
 
471
  return f"""
472
  <h5>Comparison between input news and source news:</h5>
473
+ <table border="1" style="width:100%; text-align:left;">
474
  <col style="width: 170px;">
475
  <col style="width: 170px;">
476
  <col style="width: 30px;">
test.py CHANGED
@@ -1,55 +1,13 @@
1
- import re
2
-
3
-
4
- def find_entity_spans(entity, text):
5
- """
6
- Finds the start and end indices of whole word entities in text.
7
-
8
- Args:
9
- entity: The entity string to search for.
10
- text: The text to search within.
11
-
12
- Returns:
13
- A list of tuples, where each tuple contains the start and end indices
14
- of a found entity. Returns an empty list if no entities are found.
15
- """
16
- spans = []
17
- for m in re.finditer(
18
- r"\b" + re.escape(entity) + r"\b",
19
- text,
20
- ): # The crucial change
21
- spans.append((m.start(), m.end()))
22
- return spans
23
-
24
-
25
- # Example usage:
26
- temp_text = "win winger winning"
27
- entity = {"key": "win"} # Example dictionary (adjust as needed)
28
-
29
- spans = find_entity_spans(entity["key"], temp_text)
30
- print(spans) # Output: [(0, 3)] (Only "win" at the beginning)
31
-
32
- temp_text = "The quick brown fox jumps over the lazy dog."
33
- entity = {"key": "fox"}
34
- spans = find_entity_spans(entity["key"], temp_text)
35
- print(spans) # Output: [(16, 19)]
36
-
37
- temp_text = "foxes fox foxing"
38
- entity = {"key": "fox"}
39
- spans = find_entity_spans(entity["key"], temp_text)
40
- print(spans) # Output: [(0, 3), (6, 9)]
41
-
42
- temp_text = "winger win winning"
43
- entity = {"key": "win"}
44
- spans = find_entity_spans(entity["key"], temp_text)
45
- print(spans) # Output: [(8, 11)]
46
-
47
- temp_text = "winger win winning"
48
- entity = {"key": "winger"}
49
- spans = find_entity_spans(entity["key"], temp_text)
50
- print(spans) # Output: [(0, 6)]
51
-
52
- temp_text = "winger win winning"
53
- entity = {"key": "winning"}
54
- spans = find_entity_spans(entity["key"], temp_text)
55
- print(spans) # Output: [(12, 19)]
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ # Create an empty DataFrame with 5 columns
5
+ df = pd.DataFrame(columns=['col1', 'col2', 'col3', 'col4', 'col5']) # Or any column names you want
6
+
7
+ # Method 1: Using a dictionary and append (less efficient for large DataFrames)
8
+ for _ in range(5): # Add 5 rows
9
+ df = pd.concat([df, pd.DataFrame([{'col1': np.nan, 'col2': np.nan, 'col3': np.nan, 'col4': np.nan, 'col5': np.nan}])], ignore_index=True)
10
+ d = {"col1": "ta", "col2": "gs"}
11
+ df.loc[1, "col1"] = d["col1"]
12
+ for index, row in enumerate(df):
13
+ print(index)