Spaces:

TAgroup5
/

demo-News_classifier

Sleeping

App Files Files Community

TAgroup5 commited on Mar 28

Commit

5cdc45c

verified ·

1 Parent(s): b9a2eea

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -41

app.py CHANGED Viewed

@@ -1,75 +1,66 @@
 import streamlit as st
 import pandas as pd
-from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 import io
-# Load pre-trained model and tokenizer for text classification
-model_name = "TAgroup5/news-classification-model"
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Initialize the text classification pipeline
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-# Initialize the question answering pipeline
 qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
-# Streamlit App Layout
 st.title("News Classification and Q&A")
-# Component 1: Text Classification Pipeline
 st.header("Classify News Articles")
-st.markdown("""
-    Upload a CSV file containing news articles, and the model will classify each article
-    into one of the following categories: Business, Opinion, Political Gossip, Sports, or World News.
-""")
 uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 if uploaded_file is not None:
-    df = pd.read_csv(uploaded_file)
     if 'content' not in df.columns:
-        st.error("The uploaded CSV file must have a 'content' column containing news excerpts.")
     else:
-        st.write("Preview of the data:")
         st.dataframe(df.head())
-        # Preprocess the data and classify each article
         def preprocess_text(text):
-            # Apply necessary preprocessing steps here (e.g., removing stopwords, special characters, etc.)
             return text
         # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
-        df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'])
-        # Show the results
         st.write("Classification Results:")
         st.dataframe(df[['content', 'class']])
-        # Provide an option to download the output as CSV
-        output = io.StringIO()
-        df.to_csv(output, index=False)
         st.download_button(label="Download classified news", data=output.getvalue(), file_name="output.csv", mime="text/csv")
-# Component 2: Q&A Pipeline
 st.header("Ask a Question About the News")
-st.markdown("""
-    Type in a question, and the model will extract an answer from the provided news content.
-""")
 question = st.text_input("Ask a question:")
-if question:
-    context = st.text_area("Provide the news article or content for the Q&A:", height=150)
-    if context:
-        # Perform the question-answering task
-        result = qa_pipeline(question=question, context=context)
-        st.write("Answer:", result['answer'])

 import streamlit as st
 import pandas as pd
+import re
 import io
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+# Load fine-tuned model and tokenizer
+model_name = "TAgroup5/daily-mirror-news-classifier"
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
 qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
+# Streamlit App
 st.title("News Classification and Q&A")
+## ====================== Component 1: News Classification ====================== ##
 st.header("Classify News Articles")
+st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
 uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 if uploaded_file is not None:
+    try:
+        df = pd.read_csv(uploaded_file, encoding="utf-8")  # Handle encoding issues
+    except UnicodeDecodeError:
+        df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
     if 'content' not in df.columns:
+        st.error("Error: The uploaded CSV must contain a 'content' column.")
     else:
+        st.write("Preview of uploaded data:")
         st.dataframe(df.head())
+        # Preprocessing function
         def preprocess_text(text):
+            text = text.lower()  # Ensure consistent casing
+            text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
+            text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
             return text
         # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
+        df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
+        # Show results
         st.write("Classification Results:")
         st.dataframe(df[['content', 'class']])
+        # Provide CSV download
+        output = io.BytesIO()
+        df.to_csv(output, index=False, encoding="utf-8-sig")
         st.download_button(label="Download classified news", data=output.getvalue(), file_name="output.csv", mime="text/csv")
+## ====================== Component 2: Q&A ====================== ##
 st.header("Ask a Question About the News")
+st.markdown("Enter a question and provide a news article to get an answer.")
 question = st.text_input("Ask a question:")
+context = st.text_area("Provide the news article or content for the Q&A:", height=150)
+if question and context.strip():
+    result = qa_pipeline(question=question, context=context)
+    st.write("Answer:", result['answer'])