File size: 2,990 Bytes
a043b44
fb25f09
23df277
 
517c66b
a043b44
fb25f09
23df277
 
fb25f09
517c66b
 
fb25f09
 
23df277
fb25f09
23df277
 
fb25f09
 
 
23df277
 
 
517c66b
 
 
 
 
 
 
 
fb25f09
517c66b
 
 
 
 
 
 
 
 
 
23df277
fb25f09
23df277
 
 
 
 
 
517c66b
fb25f09
23df277
517c66b
23df277
517c66b
 
fb25f09
 
 
 
 
517c66b
fb25f09
23df277
517c66b
 
 
fb25f09
517c66b
 
23df277
fb25f09
 
517c66b
23df277
517c66b
 
 
 
 
23df277
fb25f09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
from transformers import pipeline, AutoTokenizer
from PyPDF2 import PdfReader
import docx
import plotly.graph_objects as go

# Page configuration
st.set_page_config(layout="wide")
st.title("πŸ“„ AI Content Analyzer")
st.markdown("Upload PDF/Word files to detect AI-generated content")

# Constants
MAX_WORDS = 1000  # Maximum words to analyze
WARNING_THRESHOLD = 1200  # Warning threshold for large files

# Load AI detection model
@st.cache_resource
def load_model():
    model_name = "roberta-base-openai-detector"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return pipeline("text-classification", model=model_name, tokenizer=tokenizer)

detector = load_model()

def count_words(text):
    return len(text.split())

def create_gauge(score):
    fig = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = score,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': "AI Content Probability", 'font': {'size': 20}},
        gauge = {
            'axis': {'range': [None, 100], 'tickwidth': 1},
            'bar': {'color': "darkblue"},
            'steps': [
                {'range': [0, 50], 'color': 'green'},
                {'range': [50, 75], 'color': 'yellow'},
                {'range': [75, 100], 'color': 'red'}]
        }))
    st.plotly_chart(fig, use_container_width=True)

# File uploader
uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])

if uploaded_file:
    # Extract text
    text = ""
    if uploaded_file.name.endswith(".pdf"):
        reader = PdfReader(uploaded_file)
        text = " ".join([page.extract_text() or "" for page in reader.pages])
    else:
        doc = docx.Document(uploaded_file)
        text = " ".join([para.text for para in doc.paragraphs])

    word_count = count_words(text)
    
    # Word limit warning
    if word_count > WARNING_THRESHOLD:
        st.warning(f"⚠️ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")

    if st.button("Analyze Content"):
        if word_count < 50:
            st.error("❌ Insufficient text for analysis (minimum 50 words required)")
        else:
            # Process first 1000 words
            processed_text = " ".join(text.split()[:MAX_WORDS])
            
            # Perform analysis
            result = detector(processed_text)
            ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
            
            # Display results
            st.subheader("Analysis Results")
            create_gauge(ai_prob)
            
            col1, col2 = st.columns(2)
            with col1:
                st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
            with col2:
                st.metric("AI Probability", f"{ai_prob:.1f}%")
            
            with st.expander("View Text Sample"):
                st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))