# Download necessary NLTK data files """ Author: Khanh Phan Date: 2024-12-04 """ import os import nltk import openai import torch from dotenv import load_dotenv from sentence_transformers import SentenceTransformer # Load environment variables load_dotenv() GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID") AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") # GPT Model GPT_ENTITY_MODEL = "o1-mini" # "gpt-4o-mini" or "o1-mini" GPT_PARAPHRASE_MODELS = ["gpt-4o", "gpt-4o-mini"] AZUREOPENAI_CLIENT = openai.AzureOpenAI( api_version=AZURE_OPENAI_API_VERSION, # AZURE_OPENAI_API_VERSION, api_key=AZURE_OPENAI_API_KEY, azure_endpoint=AZURE_OPENAI_ENDPOINT, ) # Download the resources nltk.download("punkt", quiet=True) # Sentence tokenization nltk.download("punkt_tab", quiet=True) # Tokenization with tab-separated data nltk.download("stopwords", quiet=True) # A list of stop words STOPWORDS_LANG = "english" # Load PARAPHASE_MODEL DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") PARAPHRASE_MODEL = SentenceTransformer("paraphrase-MiniLM-L6-v2") PARAPHRASE_MODEL.to(DEVICE) # Model to detect AI-generated text AI_TEXT_DECTECTION_MODEL = "TrustSafeAI/RADAR-Vicuna-7B" # Thresholds PARAPHRASE_THRESHOLD_HUMAN = 0.963 PARAPHRASE_THRESHOLD_MACHINE = 0.8 PARAPHRASE_THRESHOLD = 0.8 MIN_SAME_SENTENCE_LEN = 6 MIN_PHRASE_SENTENCE_LEN = 10 MIN_RATIO_PARAPHRASE_NUM = 0.5 MAX_CHAR_SIZE = 30000 # Number of top URLs per search TOP_URLS_PER_SEARCH = 3 # Search parameters GOOGLE_ENDPOINT_URL = "https://www.googleapis.com/customsearch/v1" TOP_SEARCH_RESUTLS = 10 CHUNK_SIZE = 32 # words NUM_CHUNKS = 3 # number of chunks to search NUM_FREQUENT_WORDS = 32 # number of top words to return NUM_KEYWORDS = 5 # number of keywords to return # Labels MODEL_HUMAN_LABEL = {AI_TEXT_DECTECTION_MODEL: "Human"} HUMAN = "HUMAN" MACHINE = "MACHINE" UNKNOWN = "UNKNOWN" PARAPHRASE = "PARAPHRASE" NON_PARAPHRASE = "NON_PARAPHRASE" # Entity color """ factor > 1: Lightens the color. factor = 1: Leaves the color unchanged. factor < 1: Darkens the color. factor = 0: Black. """ ENTITY_LIGHTEN_COLOR = 2.2 ENTITY_DARKEN_COLOR = 0.7 ENTITY_SATURATION = 0.65 # Saturation: color's intensity (vividness). ENTITY_BRIGHTNESS = 0.75 # color's brightness.