Spaces:
Runtime error
Runtime error
File size: 4,351 Bytes
35af015 4ed336a 35af015 98bdd9f 2cc64b2 35af015 98bdd9f 2cc64b2 98bdd9f 35af015 2cc64b2 98bdd9f 2cc64b2 98bdd9f 2cc64b2 98bdd9f 2cc64b2 98bdd9f 2cc64b2 98bdd9f 2cc64b2 98bdd9f 2cc64b2 98bdd9f 35af015 2cc64b2 35af015 98bdd9f 35af015 98bdd9f 35af015 98bdd9f 35af015 98bdd9f 35af015 98bdd9f 35af015 98bdd9f 6741d4b 98bdd9f 2cc64b2 98bdd9f 35af015 98bdd9f 2cc64b2 98bdd9f 35af015 98bdd9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
from transformers import AutoTokenizer, AutoModel
from utils_MMD import extract_features # Adjust the import path
from MMD_calculate import mmd_two_sample_baseline # Adjust the import path
MINIMUM_TOKENS = 64
THRESHOLD = 0.5 # Threshold for classification
def count_tokens(text, tokenizer):
"""
Counts the number of tokens in the text using the provided tokenizer.
"""
return len(tokenizer(text).input_ids)
def run_test_power(model_name, real_text, generated_text, N=10):
"""
Runs the test power calculation for provided real and generated texts.
Args:
model_name (str): Hugging Face model name.
real_text (str): Example real text for comparison.
generated_text (str): The input text to classify.
N (int): Number of repetitions for MMD calculation.
Returns:
str: "Prediction: Human" or "Prediction: AI".
"""
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).cuda()
model.eval()
# Ensure minimum token length
if count_tokens(real_text, tokenizer) < MINIMUM_TOKENS or count_tokens(generated_text, tokenizer) < MINIMUM_TOKENS:
return "Too short length. Need a minimum of 64 tokens to calculate Test Power."
# Extract features
fea_real_ls = extract_features([real_text], tokenizer, model)
fea_generated_ls = extract_features([generated_text], tokenizer, model)
# Calculate test power list
test_power_ls = mmd_two_sample_baseline(fea_real_ls, fea_generated_ls, N=N)
# Compute the average test power value
power_test_value = sum(test_power_ls) / len(test_power_ls)
# Classify the text
if power_test_value < THRESHOLD:
return "Prediction: Human"
else:
return "Prediction: AI"
# CSS for custom styling
css = """
#header { text-align: center; font-size: 1.5em; margin-bottom: 20px; }
#output-text { font-weight: bold; font-size: 1.2em; }
"""
# Gradio App
with gr.Blocks(css=css) as app:
with gr.Row():
gr.HTML('<div id="header">Human or AI Text Detector</div>')
with gr.Row():
gr.Markdown(
"""
[Paper](https://openreview.net/forum?id=z9j7wctoGV) | [Code](https://github.com/xLearn-AU/R-Detect) | [Contact](mailto:[email protected])
"""
)
with gr.Row():
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter the text to check",
lines=8,
)
with gr.Row():
model_name = gr.Dropdown(
[
"gpt2-medium",
"gpt2-large",
"t5-large",
"t5-small",
"roberta-base",
"roberta-base-openai-detector",
"falcon-rw-1b",
],
label="Select Model",
value="gpt2-medium",
)
with gr.Row():
submit_button = gr.Button("Run Detection", variant="primary")
clear_button = gr.Button("Clear", variant="secondary")
with gr.Row():
output = gr.Textbox(
label="Prediction",
placeholder="Prediction: Human or AI",
elem_id="output-text",
)
with gr.Accordion("Disclaimer", open=False):
gr.Markdown(
"""
- **Disclaimer**: This tool is for demonstration purposes only. It is not a foolproof AI detector.
- **Accuracy**: Results may vary based on input length and quality.
"""
)
with gr.Accordion("Citations", open=False):
gr.Markdown(
"""
```
@inproceedings{zhangs2024MMDMP,
title={Detecting Machine-Generated Texts by Multi-Population Aware Optimization for Maximum Mean Discrepancy},
author={Zhang, Shuhai and Song, Yiliao and Yang, Jiahao and Li, Yuanqing and Han, Bo and Tan, Mingkui},
booktitle = {International Conference on Learning Representations (ICLR)},
year={2024}
}
```
"""
)
submit_button.click(
run_test_power, inputs=[model_name, "The cat sat on the mat.", input_text], outputs=output
)
clear_button.click(lambda: ("", ""), inputs=[], outputs=[input_text, output])
app.launch()
|