Ashendilantha commited on
Commit
47aaa4b
·
verified ·
1 Parent(s): 42bdc4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -1,18 +1,15 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
  import re
5
- import nltk
6
  from nltk.corpus import stopwords
7
  from nltk.tokenize import word_tokenize
8
  from nltk.stem import WordNetLemmatizer
9
- import torch
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
11
- import requests
12
- from io import BytesIO
13
 
14
  # Set page configuration
15
- st.set_page_config(page_title="News Classifier", page_icon="📰")
16
 
17
  # Download required NLTK resources
18
  @st.cache_resource
@@ -69,7 +66,7 @@ def preprocess_text(text):
69
 
70
  return cleaned_text
71
 
72
- # Function to classify news articles with batch processing
73
  def classify_news(df, model, tokenizer):
74
  # Preprocess the text
75
  df['cleaned_content'] = df['content'].apply(preprocess_text)
@@ -97,6 +94,26 @@ def classify_news(df, model, tokenizer):
97
 
98
  return df
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  # Main app
101
  def main():
102
  st.title("News Classifier 📢")
@@ -120,7 +137,7 @@ def main():
120
 
121
  # Classify the text
122
  with st.spinner("Classifying the article..."):
123
- category, confidence = classify_text(text_input, model, tokenizer)
124
  st.write(f"*Predicted Category:* {category}")
125
  st.write(f"*Confidence Level:* {confidence}%")
126
  else:
@@ -200,4 +217,3 @@ def main():
200
  if __name__ == "__main__":
201
  main()
202
 
203
-
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import torch
4
  import re
 
5
  from nltk.corpus import stopwords
6
  from nltk.tokenize import word_tokenize
7
  from nltk.stem import WordNetLemmatizer
 
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
9
+ import nltk
 
10
 
11
  # Set page configuration
12
+ st.set_page_config(page_title="News Analysis App", layout="wide")
13
 
14
  # Download required NLTK resources
15
  @st.cache_resource
 
66
 
67
  return cleaned_text
68
 
69
+ # Function to classify news articles (bulk processing)
70
  def classify_news(df, model, tokenizer):
71
  # Preprocess the text
72
  df['cleaned_content'] = df['content'].apply(preprocess_text)
 
94
 
95
  return df
96
 
97
+ # Function for single article classification
98
+ def classify_single_article(text, model, tokenizer):
99
+ # Preprocess the text
100
+ cleaned_text = preprocess_text(text)
101
+
102
+ # Prepare for classification
103
+ inputs = tokenizer(cleaned_text, padding=True, truncation=True, max_length=512, return_tensors="pt")
104
+
105
+ with torch.no_grad():
106
+ outputs = model(**inputs)
107
+ logits = outputs.logits
108
+ prediction = torch.argmax(logits, dim=1).item()
109
+
110
+ # Map numeric prediction back to class label
111
+ id2label = model.config.id2label
112
+ category = id2label[prediction]
113
+ confidence = torch.nn.functional.softmax(logits, dim=1).max().item() * 100
114
+
115
+ return category, round(confidence, 2)
116
+
117
  # Main app
118
  def main():
119
  st.title("News Classifier 📢")
 
137
 
138
  # Classify the text
139
  with st.spinner("Classifying the article..."):
140
+ category, confidence = classify_single_article(text_input, model, tokenizer)
141
  st.write(f"*Predicted Category:* {category}")
142
  st.write(f"*Confidence Level:* {confidence}%")
143
  else:
 
217
  if __name__ == "__main__":
218
  main()
219