Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,15 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import
|
4 |
import re
|
5 |
-
import nltk
|
6 |
from nltk.corpus import stopwords
|
7 |
from nltk.tokenize import word_tokenize
|
8 |
from nltk.stem import WordNetLemmatizer
|
9 |
-
import torch
|
10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
11 |
-
import
|
12 |
-
from io import BytesIO
|
13 |
|
14 |
# Set page configuration
|
15 |
-
st.set_page_config(page_title="News
|
16 |
|
17 |
# Download required NLTK resources
|
18 |
@st.cache_resource
|
@@ -69,7 +66,7 @@ def preprocess_text(text):
|
|
69 |
|
70 |
return cleaned_text
|
71 |
|
72 |
-
# Function to classify news articles
|
73 |
def classify_news(df, model, tokenizer):
|
74 |
# Preprocess the text
|
75 |
df['cleaned_content'] = df['content'].apply(preprocess_text)
|
@@ -97,6 +94,26 @@ def classify_news(df, model, tokenizer):
|
|
97 |
|
98 |
return df
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
# Main app
|
101 |
def main():
|
102 |
st.title("News Classifier 📢")
|
@@ -120,7 +137,7 @@ def main():
|
|
120 |
|
121 |
# Classify the text
|
122 |
with st.spinner("Classifying the article..."):
|
123 |
-
category, confidence =
|
124 |
st.write(f"*Predicted Category:* {category}")
|
125 |
st.write(f"*Confidence Level:* {confidence}%")
|
126 |
else:
|
@@ -200,4 +217,3 @@ def main():
|
|
200 |
if __name__ == "__main__":
|
201 |
main()
|
202 |
|
203 |
-
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import torch
|
4 |
import re
|
|
|
5 |
from nltk.corpus import stopwords
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
from nltk.stem import WordNetLemmatizer
|
|
|
8 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
9 |
+
import nltk
|
|
|
10 |
|
11 |
# Set page configuration
|
12 |
+
st.set_page_config(page_title="News Analysis App", layout="wide")
|
13 |
|
14 |
# Download required NLTK resources
|
15 |
@st.cache_resource
|
|
|
66 |
|
67 |
return cleaned_text
|
68 |
|
69 |
+
# Function to classify news articles (bulk processing)
|
70 |
def classify_news(df, model, tokenizer):
|
71 |
# Preprocess the text
|
72 |
df['cleaned_content'] = df['content'].apply(preprocess_text)
|
|
|
94 |
|
95 |
return df
|
96 |
|
97 |
+
# Function for single article classification
|
98 |
+
def classify_single_article(text, model, tokenizer):
|
99 |
+
# Preprocess the text
|
100 |
+
cleaned_text = preprocess_text(text)
|
101 |
+
|
102 |
+
# Prepare for classification
|
103 |
+
inputs = tokenizer(cleaned_text, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
104 |
+
|
105 |
+
with torch.no_grad():
|
106 |
+
outputs = model(**inputs)
|
107 |
+
logits = outputs.logits
|
108 |
+
prediction = torch.argmax(logits, dim=1).item()
|
109 |
+
|
110 |
+
# Map numeric prediction back to class label
|
111 |
+
id2label = model.config.id2label
|
112 |
+
category = id2label[prediction]
|
113 |
+
confidence = torch.nn.functional.softmax(logits, dim=1).max().item() * 100
|
114 |
+
|
115 |
+
return category, round(confidence, 2)
|
116 |
+
|
117 |
# Main app
|
118 |
def main():
|
119 |
st.title("News Classifier 📢")
|
|
|
137 |
|
138 |
# Classify the text
|
139 |
with st.spinner("Classifying the article..."):
|
140 |
+
category, confidence = classify_single_article(text_input, model, tokenizer)
|
141 |
st.write(f"*Predicted Category:* {category}")
|
142 |
st.write(f"*Confidence Level:* {confidence}%")
|
143 |
else:
|
|
|
217 |
if __name__ == "__main__":
|
218 |
main()
|
219 |
|
|