Spaces:

TAgroup5
/

demo-News_classifier

Running

App Files Files Community

TAgroup5 commited on Mar 29

Commit

73c0f99

verified ·

1 Parent(s): b704849

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -40

app.py CHANGED Viewed

@@ -4,81 +4,110 @@ import re
 import io
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers import AutoModelForQuestionAnswering
-# Load fine-tuned models and tokenizers for both functions
-model_name_classification = "TAgroup5/news-classification-model"  # Replace with the correct model name
-model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
-tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
-model_name_qa = "distilbert-base-cased-distilled-squad"
-model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa)
-tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
-# Initialize pipelines
-text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
-# Streamlit App
-st.title("News Classification and Q&A")
 ## ====================== Component 1: News Classification ====================== ##
-st.header("Classify News Articles")
 st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
 uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 if uploaded_file is not None:
     try:
-        df = pd.read_csv(uploaded_file, encoding="utf-8")  # Handle encoding issues
     except UnicodeDecodeError:
         df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
     if 'content' not in df.columns:
-        st.error("Error: The uploaded CSV must contain a 'content' column.")
     else:
         st.write("Preview of uploaded data:")
         st.dataframe(df.head())
-        # Preprocessing function to clean the text
         def preprocess_text(text):
-            text = text.lower()  # Convert to lowercase
-            text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
-            text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters & numbers
-            # You don't need tokenization here, as the model tokenizer will handle it
             return text
-        # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
-        # Classify each record into one of the five classes
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
-        # Show results
-        st.write("Classification Results:")
         st.dataframe(df[['content', 'class']])
         # Provide CSV download
         output = io.BytesIO()
         df.to_csv(output, index=False, encoding="utf-8-sig")
-        st.download_button(label="Download classified news", data=output.getvalue(), file_name="output.csv", mime="text/csv")
 ## ====================== Component 2: Q&A ====================== ##
-st.header("Ask a Question About the News")
 st.markdown("Enter a question and provide a news article to get an answer.")
-question = st.text_input("Ask a question:")
-context = st.text_area("Provide the news article or content for the Q&A:", height=150)
 if question and context.strip():
-    model_name_qa = "distilbert-base-uncased-distilled-squad"  # Example of a common Q&A model
     qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
     result = qa_pipeline(question=question, context=context)
-    # Check if the result contains an answer
     if 'answer' in result and result['answer']:
-        st.write("Answer:", result['answer'])
     else:
-        st.write("No answer found in the provided content.")

 import io
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers import AutoModelForQuestionAnswering
+from streamlit_extras.app_logo import add_logo  # For adding a logo
+# Custom Styling
+st.set_page_config(page_title="News Classifier & Q&A", page_icon="📰", layout="wide")
+# CSS for styling
+st.markdown(
+    """
+    <style>
+        body {
+            background-color: #f5f5f5;
+        }
+        .stApp {
+            background-color: white;
+            border-radius: 10px;
+            padding: 20px;
+            box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
+        }
+        .stTitle, .stHeader {
+            color: #0073e6;
+            text-align: center;
+        }
+        .stButton>button {
+            background-color: #0073e6 !important;
+            color: white !important;
+            border-radius: 8px !important;
+            font-size: 16px !important;
+        }
+        .stDownloadButton>button {
+            background-color: #28a745 !important;
+            color: white !important;
+            border-radius: 8px !important;
+        }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# Add a logo (optional, replace with your logo URL)
+# add_logo("https://your-logo-url.png", height=50)
+st.title("📰 News Classification & Q&A")
 ## ====================== Component 1: News Classification ====================== ##
+st.header("📌 Classify News Articles")
 st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
 uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 if uploaded_file is not None:
     try:
+        df = pd.read_csv(uploaded_file, encoding="utf-8")
     except UnicodeDecodeError:
         df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
     if 'content' not in df.columns:
+        st.error("❌ The uploaded CSV must contain a 'content' column.")
     else:
+        st.success("✅ File uploaded successfully!")
         st.write("Preview of uploaded data:")
         st.dataframe(df.head())
+        # Preprocessing function
         def preprocess_text(text):
+            text = text.lower()
+            text = re.sub(r'\s+', ' ', text)
+            text = re.sub(r'[^a-z\s]', '', text)
             return text
+        # Apply preprocessing
         df['processed_content'] = df['content'].apply(preprocess_text)
+        # Load Model
+        model_name_classification = "TAgroup5/news-classification-model"
+        model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
+        tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
+        text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+        # Classify each record
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
+        # Display results
+        st.write("📌 Classification Results:")
         st.dataframe(df[['content', 'class']])
         # Provide CSV download
         output = io.BytesIO()
         df.to_csv(output, index=False, encoding="utf-8-sig")
+        st.download_button(label="📥 Download Classified News", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")
 ## ====================== Component 2: Q&A ====================== ##
+st.header("💬 Ask a Question About the News")
 st.markdown("Enter a question and provide a news article to get an answer.")
+question = st.text_input("🔍 Ask a question:")
+context = st.text_area("📝 Provide the news article content:", height=150)
 if question and context.strip():
+    model_name_qa = "distilbert-base-uncased-distilled-squad"
     qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
     result = qa_pipeline(question=question, context=context)
+    # Display answer
     if 'answer' in result and result['answer']:
+        st.success(f"**🗣 Answer:** {result['answer']}")
     else:
+        st.warning("⚠️ No answer found in the provided content.")