Spaces:

pierrefdz
/

interactive-llm-wm

Running

File size: 28,516 Bytes

<!DOCTYPE html>
<html>
<head>
    <title>Watermark Detector</title>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.css">
    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
</head>
<body>
    <div class="container">
        <div class="d-flex justify-content-between align-items-center">
            <h1>Interactive watermark detector</h1>
            <div class="d-flex gap-2">
                <button class="btn btn-outline-secondary" data-bs-toggle="modal" data-bs-target="#helpModal">
                    <i class="bi bi-question-circle"></i>
                </button>
                <button class="btn btn-outline-secondary" data-bs-toggle="modal" data-bs-target="#paramsModal">
                    <i class="bi bi-gear"></i>
                </button>
            </div>
        </div>

        <!-- Help Modal -->
        <div class="modal fade" id="helpModal" tabindex="-1">
            <div class="modal-dialog modal-lg">
                <div class="modal-content">
                    <div class="modal-header">
                        <h5 class="modal-title">Watermark Detection Help</h5>
                        <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
                    </div>
                    <div class="modal-body help-modal-body">
                        <h4>What is LLM watermarking?</h4>
                        <p>LLM watermarking is a technique that slightly modifies how language models generate text, making it possible to detect if text was generated by a specific AI model without visibly changing the text quality.</p>
                        
                        <h4>How to use this demo</h4>
                        <ol>
                            <li>Enter a prompt in the top text area to generate watermarked text</li>
                            <li>The generated text will appear in the second text box</li>
                            <li>The text will be automatically analyzed to show which tokens (parts of text) were influenced by the watermark</li>
                            <li>The statistics at the bottom show the detection results</li>
                            <li>You can also paste any text in the second box to test if it contains a watermark</li>
                        </ol>
                        
                        <h4>Detection Methods</h4>
                        <p><strong>Maryland</strong>: A token-level detection algorithm that analyzes how unexpected each token is, based on the paper "<a href="https://arxiv.org/abs/2301.10226" target="_blank">A Watermark for Large Language Models</a>" by Kirchenbauer et al.</p>
                        <p><strong>OpenAI</strong>: A similar watermarking method inspired by initial reports from OpenAI.</p>
                        <p><strong>Maryland Z-score</strong>: A worse variant of the Maryland detector that uses z-scores for statistical interpretation.</p>
                        <p><strong>OpenAI Z-score</strong>: A worse variant of the OpenAI detector that uses z-scores for statistical interpretation.</p>
                        
                        <h4>Parameters Explained</h4>
                        <dl class="help-description-list">
                            <dt>Detector Type</dt>
                            <dd>The algorithm used to detect watermarks in the text. Different detectors perform better in different scenarios.</dd>
                            
                            <dt>Seed</dt>
                            <dd>The random seed used for watermarking. The detector must use the same seed that was used when generating the text. In a real-world scenario, this would be kept private by the model provider.</dd>
                            
                            <dt>N-gram Size</dt>
                            <dd>The number of previous tokens considered when choosing "greenlist" tokens. Larger values make the watermark less robust against edits but may improve text quality.</dd>
                            
                            <dt>Delta</dt>
                            <dd>The bias added to "greenlist" tokens during generation. Higher values make the watermark stronger but might affect text quality. Typical values range from 1.0 to 5.0.</dd>
                            
                            <dt>Temperature</dt>
                            <dd>Controls randomness in text generation. Higher values (e.g., 1.0) produce more diverse outputs; lower values (e.g., 0.2) make outputs more focused and deterministic.</dd>
                        </dl>
                        
                        <h4>Understanding Results</h4>
                        <dl class="help-description-list">
                            <dt>Tokens</dt>
                            <dd>The total number of tokens in the analyzed text. Tokens are units of text that may represent words, parts of words, or punctuation.</dd>
                            
                            <dt>Scored Tokens</dt>
                            <dd>The number of tokens that were actually evaluated by the detector (excludes first few tokens that don't have enough context).</dd>
                            
                            <dt>Final Score</dt>
                            <dd>A measure of how likely the text contains a watermark. Higher scores indicate stronger evidence of watermarking.</dd>
                            
                            <dt>P-value</dt>
                            <dd>The statistical significance of the detection. Lower values (especially p &lt; 1e-6) indicate strong evidence that the text was watermarked. Values close to 0.5 suggest no watermark is present.</dd>
                        </dl>
                        
                        <h4>Related Papers</h4>
                        <ul class="paper-references">
                            <li>
                                <a href="https://arxiv.org/abs/2301.10226" target="_blank">A Watermark for Large Language Models</a>
                                <span class="paper-authors">Kirchenbauer, et al. (2023)</span>
                            </li>
                            <li>
                                <a href="https://arxiv.org/abs/2308.00113" target="_blank">Three Bricks to Consolidate Watermarks for Large Language Models</a>
                                <span class="paper-authors">Fernandez, et al. (2023)</span>
                            </li>
                        </ul>
                    </div>
                    <div class="modal-footer">
                        <button type="button" class="btn btn-primary" data-bs-dismiss="modal">Close</button>
                    </div>
                </div>
            </div>
        </div>

        <!-- Advanced Parameters Modal -->
        <div class="modal fade" id="paramsModal" tabindex="-1">
            <div class="modal-dialog">
                <div class="modal-content">
                    <div class="modal-header">
                        <h5 class="modal-title">Advanced Parameters</h5>
                        <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
                    </div>
                    <div class="modal-body">
                        <div class="mb-3">
                            <div class="d-flex align-items-center">
                                <i class="bi bi-info-circle param-help-icon me-2" data-bs-toggle="tooltip" title="The algorithm used to detect watermarks in the text. Different algorithms have different strengths and weaknesses."></i>
                                <label for="detectorType" class="form-label mb-0">Detector Type</label>
                            </div>
                            <select class="form-select" id="detectorType">
                                <option value="maryland">Maryland</option>
                                <option value="marylandz">Maryland Z-score</option>
                                <option value="openai">OpenAI</option>
                                <option value="openaiz">OpenAI Z-score</option>
                            </select>
                            <div class="form-text">Type of watermark detection algorithm</div>
                        </div>
                        <div class="mb-3">
                            <div class="d-flex align-items-center">
                                <i class="bi bi-info-circle param-help-icon me-2" data-bs-toggle="tooltip" title="The random seed used for watermarking. For detection to work, this must match the seed used during generation."></i>
                                <label for="seed" class="form-label mb-0">Seed</label>
                            </div>
                            <input type="number" class="form-control" id="seed" value="0">
                            <div class="form-text">Random seed for the watermark detector</div>
                        </div>
                        <div class="mb-3">
                            <div class="d-flex align-items-center">
                                <i class="bi bi-info-circle param-help-icon me-2" data-bs-toggle="tooltip" title="The number of previous tokens considered when determining the hashing. Higher values are more secure but may affect robustness. Lower values may introduce a bias in the text distribution."></i>
                                <label for="ngram" class="form-label mb-0">N-gram Size</label>
                            </div>
                            <input type="number" class="form-control" id="ngram" value="1">
                            <div class="form-text">Size of the n-gram window used for detection</div>
                        </div>
                        <div class="mb-3">
                            <div class="d-flex align-items-center">
                                <i class="bi bi-info-circle param-help-icon me-2" data-bs-toggle="tooltip" title="The bias added to greenlist tokens during generation. Higher values (2.0-5.0) make watermarks easier to detect but might affect text quality."></i>
                                <label for="delta" class="form-label mb-0">Delta</label>
                            </div>
                            <input type="number" step="0.1" class="form-control" id="delta" value="2.0">
                            <div class="form-text">Bias added to greenlist tokens (for Maryland method)</div>
                        </div>
                        <div class="mb-3">
                            <div class="d-flex align-items-center">
                                <i class="bi bi-info-circle param-help-icon me-2" data-bs-toggle="tooltip" title="Controls randomness in generation. Higher values (closer to 2.0) create more diverse outputs and make watermarks easier to detect."></i>
                                <label for="temperature" class="form-label mb-0">Temperature</label>
                            </div>
                            <input type="number" step="0.1" class="form-control" id="temperature" value="0.8">
                            <div class="form-text">Temperature for sampling (higher = more random)</div>
                        </div>
                    </div>
                    <div class="modal-footer">
                        <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
                        <button type="button" class="btn btn-primary" id="applyParams">Apply</button>
                    </div>
                </div>
            </div>
        </div>

        <!-- Input Form -->
        <div class="input-section">
            <div class="prompt-container">
                <textarea id="prompt_text" 
                    placeholder="Enter your prompt here to generate text with the model..."></textarea>
                <button class="floating-btn" id="generateBtn">
                    <i class="bi bi-send-fill send-icon"></i>
                    <i class="bi bi-stop-fill stop-icon"></i>
                </button>
            </div>
            <textarea id="user_text" 
                placeholder="Generated text will appear here. Replace or edit this text to see how watermark detection works."></textarea>
        </div>

        <!-- Token Display -->
        <div class="token-display" id="tokenDisplay"></div>

        <!-- Statistics -->
        <div class="stats-container">
            <div>
                <div class="stat-value" id="tokenCount">0</div>
                <div class="stat-label">
                    Tokens
                    <i class="bi bi-question-circle help-icon"></i>
                    <span class="help-tooltip">Total number of tokens in the text</span>
                </div>
            </div>
            <div>
                <div class="stat-value" id="scoredTokens">0</div>
                <div class="stat-label">
                    Scored Tokens
                    <i class="bi bi-question-circle help-icon"></i>
                    <span class="help-tooltip">Number of tokens that were actually scored by the detector (excludes first n-gram tokens and duplicates)</span>
                </div>
            </div>
            <div>
                <div class="stat-value" id="finalScore">0.00</div>
                <div class="stat-label">
                    Final Score
                    <i class="bi bi-question-circle help-icon"></i>
                    <span class="help-tooltip">Cumulative score from all scored tokens. Higher values indicate more likely watermarked text</span>
                </div>
            </div>
            <div>
                <div class="stat-value" id="pValue">0.500</div>
                <div class="stat-label">
                    P-value
                    <i class="bi bi-question-circle help-icon"></i>
                    <span class="help-tooltip">Statistical significance of the score. Lower values indicate stronger evidence of watermarking (p < 0.05 is typically considered significant)</span>
                </div>
            </div>
        </div>
    </div>

    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
    <script>
        let debounceTimeout = null;
        let abortController = null;
        const textarea = document.getElementById('user_text');
        const promptArea = document.getElementById('prompt_text');
        const generateBtn = document.getElementById('generateBtn');
        const tokenDisplay = document.getElementById('tokenDisplay');
        const tokenCount = document.getElementById('tokenCount');
        const scoredTokens = document.getElementById('scoredTokens');
        const finalScore = document.getElementById('finalScore');
        const pValue = document.getElementById('pValue');
        const applyParamsBtn = document.getElementById('applyParams');
        const seedInput = document.getElementById('seed');
        const ngramInput = document.getElementById('ngram');
        const detectorTypeSelect = document.getElementById('detectorType');
        const deltaInput = document.getElementById('delta');
        const temperatureInput = document.getElementById('temperature');

        // Initialize tooltips
        document.addEventListener('DOMContentLoaded', function() {
            // Initialize Bootstrap tooltips
            const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
            const tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
                return new bootstrap.Tooltip(tooltipTriggerEl);
            });
            
            // Initialize our tokenization
            updateTokenization().catch(error => {
                console.error('Error during initial tokenization:', error);
            });
        });

        function startGeneration() {
            const prompt = promptArea.value.trim();
            if (!prompt) {
                alert('Please enter a prompt first.');
                return;
            }

            generateBtn.classList.add('generating');
            textarea.value = '';

            // Create new AbortController for this request
            abortController = new AbortController();

            // Get current parameters
            const params = {
                detector_type: detectorTypeSelect.value,
                seed: parseInt(seedInput.value) || 0,
                ngram: parseInt(ngramInput.value) || 1,
                delta: parseFloat(deltaInput.value) || 2.0,
                temperature: parseFloat(temperatureInput.value) || 0.8
            };

            // Create headers for SSE
            const headers = new Headers({
                'Content-Type': 'application/json',
                'Accept': 'text/event-stream',
            });

            // Start fetch request with signal
            fetch('/generate', {
                method: 'POST',
                headers: headers,
                body: JSON.stringify({ 
                    prompt: prompt,
                    params: params
                }),
                signal: abortController.signal  // Add the abort signal
            }).then(response => {
                const reader = response.body.getReader();
                const decoder = new TextDecoder();
                let buffer = '';

                function processText(text) {
                    const lines = text.split('\n');
                    
                    for (const line of lines) {
                        if (line.startsWith('data: ')) {
                            try {
                                const data = JSON.parse(line.slice(6));
                                
                                if (data.error) {
                                    alert('Error: ' + data.error);
                                    stopGeneration();
                                    return;
                                }

                                if (data.token) {
                                    // Append new token to existing text
                                    textarea.value += data.token;
                                    updateTokenization();
                                }

                                if (data.text) {
                                    // Final text (only used if something went wrong with streaming)
                                    textarea.value = data.text;
                                    updateTokenization();
                                }

                                if (data.done) {
                                    stopGeneration();
                                }
                            } catch (e) {
                                console.error('Error parsing SSE data:', e);
                            }
                        }
                    }
                }

                function pump() {
                    return reader.read().then(({value, done}) => {
                        if (done) {
                            if (buffer.length > 0) {
                                processText(buffer);
                            }
                            return;
                        }

                        buffer += decoder.decode(value, {stream: true});
                        const lines = buffer.split('\n\n');
                        buffer = lines.pop();
                        
                        for (const line of lines) {
                            processText(line);
                        }

                        return pump();
                    });
                }

                return pump();
            })
            .catch(error => {
                if (error.name === 'AbortError') {
                    console.log('Generation stopped by user');
                } else {
                    console.error('Error:', error);
                    alert('Error: Failed to generate text');
                }
            })
            .finally(() => {
                generateBtn.classList.remove('generating');
                abortController = null;
            });
        }

        function stopGeneration() {
            if (abortController) {
                abortController.abort();
                abortController = null;
            }
            generateBtn.classList.remove('generating');
        }

        // Remove BOTH old event listeners and add just one new one
        generateBtn.addEventListener('click', function(e) {
            e.preventDefault(); // Prevent any double triggers
            if (generateBtn.classList.contains('generating')) {
                stopGeneration();
            } else {
                startGeneration();
            }
        });

        async function updateTokenization() {
            const text = textarea.value;
            try {
                // Validate parameters before sending
                const seed = parseInt(seedInput.value);
                const ngram = parseInt(ngramInput.value);
                const delta = parseFloat(deltaInput.value);
                const temperature = parseFloat(temperatureInput.value);
                
                const response = await fetch('/tokenize', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({ 
                        text: text,
                        params: {
                            detector_type: detectorTypeSelect.value,
                            seed: isNaN(seed) ? 0 : seed,
                            ngram: isNaN(ngram) ? 1 : ngram,
                            delta: isNaN(delta) ? 2.0 : delta,
                            temperature: isNaN(temperature) ? 0.8 : temperature
                        }
                    })
                });

                if (!response.ok) {
                    const errorData = await response.json();
                    throw new Error(errorData.error || `HTTP error! status: ${response.status}`);
                }
                
                const data = await response.json();
                
                if (data.error) {
                    throw new Error(data.error);
                }
                
                // Update token display
                tokenDisplay.innerHTML = data.tokens.map((token, i) => {
                    const score = data.scores[i];
                    const pvalue = data.pvalues[i];
                    const scoreDisplay = (score !== null && !isNaN(score)) ? score.toFixed(3) : 'N/A';
                    const pvalueDisplay = (pvalue !== null && !isNaN(pvalue)) ? formatPValue(pvalue) : 'N/A';
                    
                    return `<span class="token" style="background-color: ${data.colors[i]}">
                        ${token}
                        <div class="token-tooltip">
                            Score: ${scoreDisplay}<br>
                            P-value: ${pvalueDisplay}
                        </div>
                    </span>`;
                }).join('');
                
                // Update counts and stats - safely handle null values
                tokenCount.textContent = data.token_count || 0;
                scoredTokens.textContent = data.ntoks_scored || 0;
                finalScore.textContent = (data.final_score !== null && !isNaN(data.final_score)) ? 
                    data.final_score.toFixed(2) : '0.00';
                pValue.textContent = (data.final_pvalue !== null && !isNaN(data.final_pvalue)) ? 
                    formatPValue(data.final_pvalue) : '0.500';

                // Clear any previous error
                const existingError = tokenDisplay.querySelector('.alert-danger');
                if (existingError) {
                    existingError.remove();
                }
            } catch (error) {
                console.error('Error updating tokenization:', error);
                // Show detailed error to user
                tokenDisplay.innerHTML = `<div class="alert alert-danger">
                    <strong>Error:</strong> ${error.message || 'Error updating results. Please try again.'}
                </div>`;
                
                // Reset stats on error
                tokenCount.textContent = '0';
                scoredTokens.textContent = '0';
                finalScore.textContent = '0.00';
                pValue.textContent = '0.500';
            }
        }

        // Increase debounce timeout and ensure it's properly cleared
        textarea.addEventListener('input', function() {
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500); // Increased to 500ms
        });

        // Add input event listeners for parameter fields to trigger updates
        seedInput.addEventListener('input', function() {
            const value = this.value === '' ? '' : parseInt(this.value);
            if (isNaN(value) && this.value !== '') {
                this.value = "0";
            }
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500);
        });

        ngramInput.addEventListener('input', function() {
            const value = this.value === '' ? '' : parseInt(this.value);
            if (isNaN(value) && this.value !== '') {
                this.value = "1";
            }
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500);
        });

        deltaInput.addEventListener('input', function() {
            const value = this.value === '' ? '' : parseFloat(this.value);
            if (isNaN(value) && this.value !== '') {
                this.value = "2.0";
            }
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500);
        });

        temperatureInput.addEventListener('input', function() {
            const value = this.value === '' ? '' : parseFloat(this.value);
            if (isNaN(value) && this.value !== '') {
                this.value = "0.8";
            }
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500);
        });

        // Add keyboard shortcut for applying changes
        document.addEventListener('keydown', function(e) {
            if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
                e.preventDefault();
                if (document.activeElement === promptArea) {
                    if (generateBtn.classList.contains('generating')) {
                        stopGeneration();
                    } else {
                        startGeneration();
                    }
                } else {
                    applyParamsBtn.click();
                }
            }
        });

        detectorTypeSelect.addEventListener('change', function() {
            if (debounceTimeout) {
                clearTimeout(debounceTimeout);
            }
            debounceTimeout = setTimeout(updateTokenization, 500);
        });

        // Ensure the modal apply button properly triggers an update
        applyParamsBtn.addEventListener('click', function() {
            updateTokenization().then(() => {
                const modal = bootstrap.Modal.getInstance(document.getElementById('paramsModal'));
                if (modal) {
                    modal.hide();
                }
            }).catch(error => {
                console.error('Error applying parameters:', error);
            });
        });

        // Add this helper function for formatting p-values
        function formatPValue(value) {
            if (value >= 0.001) {
                return value.toFixed(3);
            } else {
                return value.toExponential(2);
            }
        }
    </script>
</body>
</html>