Spaces:

TAgroup5
/

demo-News_classifier

Sleeping

App Files Files Community

TAgroup5 commited on Mar 29

Commit

b704849

verified ·

1 Parent(s): 6808eeb

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -78

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import io
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers import AutoModelForQuestionAnswering
-# Load fine-tuned models and tokenizers for both functions
 model_name_classification = "TAgroup5/news-classification-model"  # Replace with the correct model name
 model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
 tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
@@ -16,81 +17,68 @@ tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-qa_pipeline = pipeline("question-answering", model=model)
-# Streamlit App
-st.set_page_config(page_title="News Classification & Q&A", page_icon="📰", layout="wide")
-st.markdown(
-    """
-    <style>
-        body {background-color: #f4f4f4;}
-        .title {text-align: center; font-size: 36px; font-weight: bold; color: #ff4b4b;}
-        .subheader {font-size: 24px; color: #333; margin-bottom: 20px; text-align: right;}
-        .stTextInput>div>div>input {border-radius: 10px;}
-        .stTextArea>div>div>textarea {border-radius: 10px;}
-        .stButton>button {border-radius: 10px; background-color: #ff4b4b; color: white; font-weight: bold;}
-    </style>
-    """,
-    unsafe_allow_html=True,
-)
-st.markdown('<h1 class="title">📰 News Classification & Q&A App</h1>', unsafe_allow_html=True)
-col1, col2 = st.columns([2, 1])
-with col2:
-    # ====================== Component 1: News Classification ====================== #
-    st.markdown('<h2 class="subheader">📌 Classify News Articles</h2>', unsafe_allow_html=True)
-    st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
-    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
-    if uploaded_file is not None:
-        try:
-            df = pd.read_csv(uploaded_file, encoding="utf-8")  # Handle encoding issues
-        except UnicodeDecodeError:
-            df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
-        if 'content' not in df.columns:
-            st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
-        else:
-            st.success("✅ File successfully uploaded!")
-            st.write("Preview of uploaded data:")
-            st.dataframe(df.head())
-            # Preprocessing function to clean the text
-            def preprocess_text(text):
-                text = text.lower()  # Convert to lowercase
-                text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
-                text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters & numbers
-                return text
-            # Apply preprocessing and classification
-            df['processed_content'] = df['content'].apply(preprocess_text)
-            df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
-            # Show results
-            st.markdown("### 🔹 Classification Results:")
-            st.dataframe(df[['content', 'class']])
-            # Provide CSV download
-            output = io.BytesIO()
-            df.to_csv(output, index=False, encoding="utf-8-sig")
-            st.download_button(label="⬇️ Download classified news", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")
-    # ====================== Component 2: Q&A ====================== #
-    st.markdown('<h2 class="subheader">❓ Ask a Question About the News</h2>', unsafe_allow_html=True)
-    st.markdown("Enter a question and provide a news article to get an answer.")
-    question = st.text_input("🔍 Ask a question:")
-    context = st.text_area("📝 Provide the news article or content:", height=150)
-    if question and context.strip():
-        model_name_qa = "distilbert-base-uncased-distilled-squad"
-        qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
-        result = qa_pipeline(question=question, context=context)
-        # Display Answer
-        if 'answer' in result and result['answer']:
-            st.markdown(f"### ✅ Answer: {result['answer']}")
-        else:
-            st.markdown("### ❌ No answer found in the provided content.")

 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers import AutoModelForQuestionAnswering
+# Load fine-tuned models and tokenizers for both functions
 model_name_classification = "TAgroup5/news-classification-model"  # Replace with the correct model name
 model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
 tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
+# Streamlit App
+st.title("News Classification and Q&A")
+## ====================== Component 1: News Classification ====================== ##
+st.header("Classify News Articles")
+st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
+uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+if uploaded_file is not None:
+    try:
+        df = pd.read_csv(uploaded_file, encoding="utf-8")  # Handle encoding issues
+    except UnicodeDecodeError:
+        df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
+    if 'content' not in df.columns:
+        st.error("Error: The uploaded CSV must contain a 'content' column.")
+    else:
+        st.write("Preview of uploaded data:")
+        st.dataframe(df.head())
+        # Preprocessing function to clean the text
+        def preprocess_text(text):
+            text = text.lower()  # Convert to lowercase
+            text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
+            text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters & numbers
+            # You don't need tokenization here, as the model tokenizer will handle it
+            return text
+        # Apply preprocessing and classification
+        df['processed_content'] = df['content'].apply(preprocess_text)
+        # Classify each record into one of the five classes
+        df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
+        # Show results
+        st.write("Classification Results:")
+        st.dataframe(df[['content', 'class']])
+        # Provide CSV download
+        output = io.BytesIO()
+        df.to_csv(output, index=False, encoding="utf-8-sig")
+        st.download_button(label="Download classified news", data=output.getvalue(), file_name="output.csv", mime="text/csv")
+## ====================== Component 2: Q&A ====================== ##
+st.header("Ask a Question About the News")
+st.markdown("Enter a question and provide a news article to get an answer.")
+question = st.text_input("Ask a question:")
+context = st.text_area("Provide the news article or content for the Q&A:", height=150)
+if question and context.strip():
+    model_name_qa = "distilbert-base-uncased-distilled-squad"  # Example of a common Q&A model
+    qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
+    result = qa_pipeline(question=question, context=context)
+    # Check if the result contains an answer
+    if 'answer' in result and result['answer']:
+        st.write("Answer:", result['answer'])
+    else:
+        st.write("No answer found in the provided content.")