Spaces:

pmkhanh7890
/

news_verification

Sleeping

App Files Files

pmkhanh7890 commited on Feb 18

Commit

5d842c6

1 Parent(s): 38fd181

update generated content

Browse files

Files changed (3) hide show

gpt_test.py +92 -17
src/application/content_detection.py +1 -1
test.py +13 -55

gpt_test.py CHANGED Viewed

@@ -1,7 +1,69 @@
 import os
 from dotenv import load_dotenv
-from openai import AzureOpenAI
 load_dotenv()
 AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
@@ -14,22 +76,35 @@ azure_client = AzureOpenAI(
     api_version="2024-05-01-preview",
 )
-deplopment_name = "o1-mini"  # or "gpt-4o"
 TEXT_PROMPT = """
-replace Ukraine with Denmark:
-"Sir Keir Starmer has pledged to put Ukraine in the "strongest
-possible position" on a trip to Kyiv where he signed a
-"landmark" 100-year pact with the war-stricken country.
 """
-response = azure_client.chat.completions.create(
-    model=deplopment_name,  # model = "deployment_name".
-    messages=[
-        # {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": TEXT_PROMPT},
-    ],
-    # max_tokens=512,
-    # temperature=0,
-)
-print(response.choices[0].message.content)

 import os
 from dotenv import load_dotenv
+from openai import AzureOpenAI, OpenAIError
+import csv
+def get_first_column(csv_filepath):
+    """
+    Reads a CSV file with a header and returns a list containing only the
+    values from the first column.
+    Args:
+        csv_filepath: The path to the CSV file.
+    Returns:
+        A list of strings, where each string is a value from the first
+        column of the CSV file.  Returns an empty list if there's an error
+        opening or reading the file, or if the file has no rows after the header.
+        Prints an error message to the console in case of file errors.
+    """
+    first_column_values = []
+    try:
+        with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:  # Handle potential encoding issues
+            reader = csv.reader(csvfile)
+            next(reader, None)  # Skip the header row (if it exists)
+            for row in reader:
+                if row:  # Check for empty rows
+                    first_column_values.append(row[0])
+    except FileNotFoundError:
+        print(f"Error: File not found at {csv_filepath}")
+    except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError)
+        print(f"An error occurred: {e}")
+    return first_column_values
+def add_text_to_csv(csv_filepath, text_to_add, index=0):
+    """
+    Adds text to a single-column CSV file (UTF-8 encoding).
+    Args:
+        csv_filepath: The path to the CSV file.
+        text_to_add: The text to append to the CSV file (one value per new row).
+    """
+    try:
+        with open(csv_filepath, 'a', newline='', encoding='utf-8') as csvfile:  # 'a' for append mode
+            writer = csv.writer(csvfile)
+            # Check if file is empty to determine if header needs to be written
+            csvfile.seek(0, 2)  # Go to end of file
+            is_empty = csvfile.tell() == 0
+            if is_empty:
+                writer.writerow(["#", "Text"])  # Write header if file is empty
+            if isinstance(text_to_add, list):  # Check if text_to_add is a list
+                for text_item in text_to_add:
+                    writer.writerow([index, text_item]) # Write text_item as a single-element row
+            else:  # If not a list, assume it's a single string
+                writer.writerow([index, text_to_add])  # Write text_to_add as a single-element row
+    except Exception as e:
+        print(f"An error occurred: {e}")
 load_dotenv()
 AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
     api_version="2024-05-01-preview",
 )
+deplopment_name = "gpt-4o" # "o1-mini"  # or "gpt-4o"
 TEXT_PROMPT = """
+Paraphrase the following news, only output the paraphrased text:
 """
+text = get_first_column("data/bbc_news.csv")
+count  = 0
+for index, news in enumerate(text):
+    if count > 1000:
+        break
+    prompt = TEXT_PROMPT + news
+    print(f"{index:5}:\t{news[:50]}")
+    #print(f"{index:5}:\t{prompt}")
+    try:
+        response = azure_client.chat.completions.create(
+            model=deplopment_name,  # model = "deployment_name".
+            messages=[
+                # {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": prompt},
+            ],
+            # max_tokens=512,
+            # temperature=0,
+        )
+    except OpenAIError as e:
+        print(f"Error interacting with OpenAI API: {e}")
+        continue
+    count += 1
+    paraphrased_news = response.choices[0].message.content
+    add_text_to_csv("data/bbc_news_4o.csv", paraphrased_news, count)

src/application/content_detection.py CHANGED Viewed

@@ -470,7 +470,7 @@ class NewsVerification:
         return f"""
 <h5>Comparison between input news and source news:</h5>
-<table border="1" style="width:100%; text-align:left; border-collapse:collapse;">  # noqa: E501
 <col style="width: 170px;">
 <col style="width: 170px;">
 <col style="width: 30px;">

         return f"""
 <h5>Comparison between input news and source news:</h5>
+<table border="1" style="width:100%; text-align:left;">
 <col style="width: 170px;">
 <col style="width: 170px;">
 <col style="width: 30px;">

test.py CHANGED Viewed

@@ -1,55 +1,13 @@
-import re
-def find_entity_spans(entity, text):
-    """
-    Finds the start and end indices of whole word entities in text.
-    Args:
-        entity: The entity string to search for.
-        text: The text to search within.
-    Returns:
-        A list of tuples, where each tuple contains the start and end indices
-        of a found entity.  Returns an empty list if no entities are found.
-    """
-    spans = []
-    for m in re.finditer(
-        r"\b" + re.escape(entity) + r"\b",
-        text,
-    ):  # The crucial change
-        spans.append((m.start(), m.end()))
-    return spans
-# Example usage:
-temp_text = "win winger winning"
-entity = {"key": "win"}  # Example dictionary (adjust as needed)
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(0, 3)] (Only "win" at the beginning)
-temp_text = "The quick brown fox jumps over the lazy dog."
-entity = {"key": "fox"}
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(16, 19)]
-temp_text = "foxes fox foxing"
-entity = {"key": "fox"}
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(0, 3), (6, 9)]
-temp_text = "winger win winning"
-entity = {"key": "win"}
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(8, 11)]
-temp_text = "winger win winning"
-entity = {"key": "winger"}
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(0, 6)]
-temp_text = "winger win winning"
-entity = {"key": "winning"}
-spans = find_entity_spans(entity["key"], temp_text)
-print(spans)  # Output: [(12, 19)]

+import numpy as np
+import pandas as pd
+# Create an empty DataFrame with 5 columns
+df = pd.DataFrame(columns=['col1', 'col2', 'col3', 'col4', 'col5'])  # Or any column names you want
+# Method 1: Using a dictionary and append (less efficient for large DataFrames)
+for _ in range(5):  # Add 5 rows
+    df = pd.concat([df, pd.DataFrame([{'col1': np.nan, 'col2': np.nan, 'col3': np.nan, 'col4': np.nan, 'col5': np.nan}])], ignore_index=True)
+d =  {"col1": "ta", "col2": "gs"}
+df.loc[1, "col1"] = d["col1"]
+for index, row in enumerate(df):
+    print(index)