dejanseo commited on
Commit
bd98692
·
verified ·
1 Parent(s): 68d1553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -35
app.py CHANGED
@@ -3,9 +3,9 @@ import torch
3
  import torch.nn.functional as F
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import re
6
- import logging
7
 
8
- # Set up logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
@@ -16,34 +16,38 @@ st.set_page_config(
16
  layout="wide"
17
  )
18
 
19
- # Logo
20
- st.markdown(
21
- """
22
- <a href="https://dejan.ai/" target="_blank">
23
- <img src="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png" alt="DEJAN logo">
24
- </a>
25
- """,
26
- unsafe_allow_html=True
27
  )
28
 
29
- # Custom font
30
  st.markdown("""
31
  <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
32
  <style>
33
- html, body, [class*="css"] {
34
- font-family: 'Roboto', sans-serif;
35
- }
36
  </style>
37
  """, unsafe_allow_html=True)
38
 
39
- @st.cache_resource
40
  def load_model_and_tokenizer(model_name):
 
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
42
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
  dtype = torch.bfloat16 if (device.type == "cuda" and torch.cuda.is_bf16_supported()) else torch.float32
 
 
 
44
  model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
45
  model.to(device)
46
  model.eval()
 
 
47
  return tokenizer, model, device
48
 
49
  MODEL_NAME = "dejanseo/ai-detection-small"
@@ -51,33 +55,33 @@ try:
51
  tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
52
  except Exception as e:
53
  st.error(f"Error loading model: {e}")
54
- logger.error("Failed to load model or tokenizer", exc_info=True)
55
  st.stop()
56
 
57
  # Labels
58
  LABELS = ["AI Content", "Human Content"]
59
 
60
- # Sentence splitter
61
  def sent_tokenize(text):
62
  sentences = re.split(r'(?<=[\.!?])\s+', text.strip())
63
  return [s for s in sentences if s]
64
 
65
  # UI
66
  st.title("AI Article Detection")
67
- text = st.text_area("Enter text to classify", height=200)
68
 
69
  if st.button("Classify", type="primary"):
70
- if not text.strip():
71
  st.warning("Please enter some text.")
72
  else:
73
- with st.spinner("Analyzing..."):
74
  try:
75
  sentences = sent_tokenize(text)
76
  if not sentences:
77
  st.warning("No sentences detected.")
78
  st.stop()
79
 
80
- # Tokenize each sentence
81
  inputs = tokenizer(
82
  sentences,
83
  return_tensors="pt",
@@ -90,21 +94,17 @@ if st.button("Classify", type="primary"):
90
  with torch.no_grad():
91
  outputs = model(**inputs)
92
  logits = outputs.logits
93
- probs = F.softmax(logits, dim=-1).cpu() # shape [n_sentences, 2]
94
  preds = torch.argmax(probs, dim=-1).cpu()
95
 
96
  # Build inline styled text
97
  styled_chunks = []
98
  for i, sent in enumerate(sentences):
99
  pred = preds[i].item()
100
- # select color channel
101
- if pred == 0:
102
- r, g = 255, 0 # red for AI
103
- else:
104
- r, g = 0, 255 # green for Human
105
- confidence = probs[i, pred].item() # between 0 and 1
106
- alpha = confidence # drive opacity directly
107
- # wrap sentence in span
108
  span = (
109
  f"<span "
110
  f"style='background-color: rgba({r},{g},0,{alpha:.2f}); "
@@ -114,15 +114,14 @@ if st.button("Classify", type="primary"):
114
  )
115
  styled_chunks.append(span)
116
 
117
- # join all sentences inline
118
  full_text_html = "".join(styled_chunks)
119
  st.markdown(full_text_html, unsafe_allow_html=True)
120
 
121
- # Overall AI likelihood
122
  avg_probs = torch.mean(probs, dim=0)
123
- ai_likelihood = avg_probs[0].item() * 100 # class 0 is AI
124
  st.subheader(f"🤖 AI Likelihood: {ai_likelihood:.1f}%")
125
 
126
  except Exception as e:
127
- st.error(f"Analysis error: {e}")
128
- logger.error("Classification failed", exc_info=True)
 
3
  import torch.nn.functional as F
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import re
6
+ import logging # Optional: Add logging for better debugging
7
 
8
+ # Set up logging (optional but helpful)
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
 
16
  layout="wide"
17
  )
18
 
19
+ # Logo as provided
20
+ st.logo(
21
+ image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png",
22
+ link="https://dejan.ai/",
 
 
 
 
23
  )
24
 
25
+ # Font styling
26
  st.markdown("""
27
  <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
28
  <style>
29
+ html, body, [class*="css"] {
30
+ font-family: 'Roboto', sans-serif;
31
+ }
32
  </style>
33
  """, unsafe_allow_html=True)
34
 
35
+ @st.cache_resource # Cache the model and tokenizer to avoid reloading on every interaction
36
  def load_model_and_tokenizer(model_name):
37
+ """Loads the model and tokenizer."""
38
+ logger.info(f"Loading tokenizer: {model_name}")
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
+
41
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
42
  dtype = torch.bfloat16 if (device.type == "cuda" and torch.cuda.is_bf16_supported()) else torch.float32
43
+ logger.info(f"Using device: {device} with dtype: {dtype}")
44
+
45
+ logger.info(f"Loading model: {model_name}")
46
  model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
47
  model.to(device)
48
  model.eval()
49
+ logger.info("Model loaded successfully.")
50
+
51
  return tokenizer, model, device
52
 
53
  MODEL_NAME = "dejanseo/ai-detection-small"
 
55
  tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
56
  except Exception as e:
57
  st.error(f"Error loading model: {e}")
58
+ logger.error(f"Failed to load model or tokenizer: {e}", exc_info=True)
59
  st.stop()
60
 
61
  # Labels
62
  LABELS = ["AI Content", "Human Content"]
63
 
64
+ # Regex-based sentence splitter
65
  def sent_tokenize(text):
66
  sentences = re.split(r'(?<=[\.!?])\s+', text.strip())
67
  return [s for s in sentences if s]
68
 
69
  # UI
70
  st.title("AI Article Detection")
71
+ text = st.text_area("Enter text to classify", height=200, placeholder="Paste your text here...")
72
 
73
  if st.button("Classify", type="primary"):
74
+ if not text or not text.strip():
75
  st.warning("Please enter some text.")
76
  else:
77
+ with st.spinner("Analyzing... Please wait."):
78
  try:
79
  sentences = sent_tokenize(text)
80
  if not sentences:
81
  st.warning("No sentences detected.")
82
  st.stop()
83
 
84
+ # Tokenize sentences
85
  inputs = tokenizer(
86
  sentences,
87
  return_tensors="pt",
 
94
  with torch.no_grad():
95
  outputs = model(**inputs)
96
  logits = outputs.logits
97
+ probs = F.softmax(logits, dim=-1).cpu() # [n_sentences, 2]
98
  preds = torch.argmax(probs, dim=-1).cpu()
99
 
100
  # Build inline styled text
101
  styled_chunks = []
102
  for i, sent in enumerate(sentences):
103
  pred = preds[i].item()
104
+ # red for AI (class 0), green for Human (class 1)
105
+ r, g = (255, 0) if pred == 0 else (0, 255)
106
+ confidence = probs[i, pred].item() # 0.0–1.0
107
+ alpha = confidence # opacity
 
 
 
 
108
  span = (
109
  f"<span "
110
  f"style='background-color: rgba({r},{g},0,{alpha:.2f}); "
 
114
  )
115
  styled_chunks.append(span)
116
 
 
117
  full_text_html = "".join(styled_chunks)
118
  st.markdown(full_text_html, unsafe_allow_html=True)
119
 
120
+ # Overall AI likelihood (class 0)
121
  avg_probs = torch.mean(probs, dim=0)
122
+ ai_likelihood = avg_probs[0].item() * 100
123
  st.subheader(f"🤖 AI Likelihood: {ai_likelihood:.1f}%")
124
 
125
  except Exception as e:
126
+ st.error(f"An error occurred during analysis: {e}")
127
+ logger.error("Analysis failed", exc_info=True)