Spaces:

TAgroup5
/

demo-News_classifier

Running

App Files Files Community

TAgroup5 commited on Mar 29

Commit

4a1b338

verified ·

1 Parent(s): 9bc7dbb

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -47

app.py CHANGED Viewed

@@ -15,40 +15,35 @@ st.markdown("""
             font-family: 'Arial', sans-serif;
         }
         .stApp {
-            background-image: url('https://i.pinimg.com/736x/9f/07/01/9f070105a396cfe2dc0dc5d7771e61f6.jpg');
             background-size: cover;
             background-position: center;
             padding: 20px;
             border-radius: 10px;
             box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
         }
-        h1, h2 {
             color: #ff4b4b;
         }
         .stButton>button {
-            background-color: #ff4b4b !important;
-            color: white;
-            font-size: 16px;
-            border-radius: 5px;
         }
         .stDownloadButton>button {
             background-color: #28a745 !important;
-            color: white;
-            font-size: 16px;
-            border-radius: 5px;
-        }
-        .stTextInput>div>div>input {
-            border-radius: 5px;
-            border: 1px solid #ccc;
-        }
-        .stTextArea>div>textarea {
-            border-radius: 5px;
-            border: 1px solid #ccc;
         }
     </style>
 """, unsafe_allow_html=True)
-# Load fine-tuned models and tokenizers
 model_name_classification = "TAgroup5/news-classification-model"
 model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
 tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
@@ -59,64 +54,82 @@ tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
 # Streamlit App
 st.title(" News Classification and Q&A ")
-## ====================== Component 1: News Classification ====================== ##
 st.header("📌 Classify News Articles")
-st.markdown("Upload a CSV file with a **'content'** column to classify news into categories.")
 uploaded_file = st.file_uploader("📂 Choose a CSV file", type="csv")
 if uploaded_file is not None:
-    try:
-        df = pd.read_csv(uploaded_file, encoding="utf-8")
-    except UnicodeDecodeError:
-        df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
     if 'content' not in df.columns:
         st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
     else:
         st.write("✅ Preview of uploaded data:")
         st.dataframe(df.head())
-        # Preprocessing function
         def preprocess_text(text):
-            text = text.lower() # Convert to lowercase
-            text = re.sub(r'\s+', ' ', text) # Remove extra spaces
-            text = re.sub(r'[^a-z\s]', '', text) # Remove special characters & numbers
             return text
-        # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
-        # Classify each record into one of the five classes
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
-        # Show results
         st.write("🔍 Classification Results:")
         st.dataframe(df[['content', 'class']])
-        # Provide CSV download
         output = io.BytesIO()
         df.to_csv(output, index=False, encoding="utf-8-sig")
-        st.download_button(label="📥 Download Classified News", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")
-## ====================== Component 2: Q&A ====================== ##
 st.header("💬 Ask a Question About the News")
-st.markdown("Enter a question and provide a news article to get an AI-generated answer.")
 question = st.text_input("❓ Ask a question:")
 context = st.text_area("📰 Provide the news article or content:", height=150)
 if question and context.strip():
-    model_name_qa = "distilbert-base-uncased-distilled-squad"
-    qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
     result = qa_pipeline(question=question, context=context)
-    if 'answer' in result and result['answer']:
-        st.success(f"✅ Answer: {result['answer']}")
-    else:
-        st.warning("⚠️ No answer found in the provided content.")

             font-family: 'Arial', sans-serif;
         }
         .stApp {
+            background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
             background-size: cover;
             background-position: center;
             padding: 20px;
             border-radius: 10px;
             box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
         }
+        h1 {
             color: #ff4b4b;
+            text-align: center;
         }
         .stButton>button {
+            background-color: #088da5 !important;
+            color: white !important;
+            font-size: 18px !important;
+            border-radius: 10px !important;
+            width: 100%;
+            padding: 10px;
         }
         .stDownloadButton>button {
             background-color: #28a745 !important;
+            color: white !important;
+            font-size: 16px !important;
+            border-radius: 10px !important;
         }
     </style>
 """, unsafe_allow_html=True)
+# Load fine-tuned models
 model_name_classification = "TAgroup5/news-classification-model"
 model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
 tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)
 # Streamlit App
 st.title(" News Classification and Q&A ")
+## ====================== News Classification ====================== ##
 st.header("📌 Classify News Articles")
+st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.")
 uploaded_file = st.file_uploader("📂 Choose a CSV file", type="csv")
 if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file, encoding="utf-8")
     if 'content' not in df.columns:
         st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
     else:
         st.write("✅ Preview of uploaded data:")
         st.dataframe(df.head())
         def preprocess_text(text):
+            text = text.lower()
+            text = re.sub(r'\s+', ' ', text)
+            text = re.sub(r'[^a-z\s]', '', text)
             return text
         df['processed_content'] = df['content'].apply(preprocess_text)
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
         st.write("🔍 Classification Results:")
         st.dataframe(df[['content', 'class']])
         output = io.BytesIO()
         df.to_csv(output, index=False, encoding="utf-8-sig")
+        st.download_button("📥 Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")
+        st.write("🔍 **Filter by Category**")
+        categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']
+        col1, col2, col3, col4, col5, col6 = st.columns(6)
+        selected_category = 'All'
+        with col1:
+            if st.button("All"):
+                selected_category = 'All'
+        with col2:
+            if st.button("📈 Business"):
+                selected_category = 'Business'
+        with col3:
+            if st.button("🗣 Opinion"):
+                selected_category = 'Opinion'
+        with col4:
+            if st.button("🏛 Political Gossip"):
+                selected_category = 'Political_gossip'
+        with col5:
+            if st.button("⚽ Sports"):
+                selected_category = 'Sports'
+        with col6:
+            if st.button("🌎 World News"):
+                selected_category = 'World_news'
+        if selected_category != 'All':
+            filtered_df = df[df['class'] == selected_category]
+        else:
+            filtered_df = df
+        st.write(f"🔎 Showing news articles in category: {selected_category}")
+        st.dataframe(filtered_df[['content', 'class']])
+# Add a separator
+st.markdown("---")
+## ====================== Q&A ====================== ##
 st.header("💬 Ask a Question About the News")
 question = st.text_input("❓ Ask a question:")
 context = st.text_area("📰 Provide the news article or content:", height=150)
 if question and context.strip():
     result = qa_pipeline(question=question, context=context)
+    st.success(f"✅ Answer: {result['answer']}")