Spaces:
Sleeping
Sleeping
File size: 4,998 Bytes
246133f 5cdc45c 237a63b 5cdc45c ec68c76 73c0f99 f72fd34 87e256d 0b9251c f72fd34 0b9251c f72fd34 0b9251c f72fd34 2e81854 f72fd34 a73b5ca 2e81854 f72fd34 2e81854 f72fd34 2e81854 276b437 3412bc8 73c0f99 3412bc8 9ff5a0e d033ce0 2ec8bb5 9ff5a0e 0b9251c 2ec8bb5 2e81854 f72fd34 a481c00 9ff5a0e f72fd34 9ff5a0e 2e81854 9ff5a0e f72fd34 9ff5a0e f72fd34 9ff5a0e 2e81854 9ff5a0e f72fd34 9ff5a0e a481c00 2e81854 a73b5ca 2e81854 a73b5ca 2e81854 a73b5ca 2e81854 d033ce0 2e81854 d033ce0 2d33e82 d033ce0 2e81854 f72fd34 9ff5a0e 2e81854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
import pandas as pd
import re
import io
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from transformers import AutoModelForQuestionAnswering
# Streamlit UI
st.set_page_config(page_title="News Classifier & Q&A", layout="wide")
st.markdown("""
<style>
body {
background-color: #f4f4f4;
color: #333333;
font-family: 'Arial', sans-serif;
}
.stApp {
background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
background-size: cover;
background-position: center;
padding: 20px;
border-radius: 10px;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
}
h1 {
color: #ff4b4b;
text-align: center;
}
.stButton>button {
background-color: #088da5 !important;
color: white !important;
font-size: 18px !important;
border-radius: 10px !important;
width: 100%;
padding: 10px;
}
.stDownloadButton>button {
background-color: #28a745 !important;
color: white !important;
font-size: 16px !important;
border-radius: 10px !important;
}
</style>
""", unsafe_allow_html=True)
# Load fine-tuned models
model_name_classification = "TAgroup5/daily-mirror-news-classifier"
model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
model_name_qa = "distilbert-base-cased-distilled-squad"
model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa)
tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
# Initialize pipelines
text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)
# Streamlit App
st.title(" News Classification and Q&A ")
## ====================== News Classification ====================== ##
st.header("π Classify News Articles")
st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.")
uploaded_file = st.file_uploader("π Choose a CSV file", type="csv")
if uploaded_file is not None:
df = pd.read_csv(uploaded_file, encoding="utf-8")
if 'content' not in df.columns:
st.error("β Error: The uploaded CSV must contain a 'content' column.")
else:
st.write("β
Preview of uploaded data:")
st.dataframe(df.head())
def preprocess_text(text):
text = text.lower()
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
return text
df['processed_content'] = df['content'].apply(preprocess_text)
df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
st.write("π Classification Results:")
st.dataframe(df[['content', 'class']])
output = io.BytesIO()
df.to_csv(output, index=False, encoding="utf-8-sig")
st.download_button("π₯ Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")
st.write("π **Filter by Category**")
categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']
col1, col2, col3, col4, col5, col6 = st.columns(6)
selected_category = 'All'
with col1:
if st.button("All"):
selected_category = 'All'
with col2:
if st.button("π Business"):
selected_category = 'Business'
with col3:
if st.button("π£ Opinion"):
selected_category = 'Opinion'
with col4:
if st.button("π Political Gossip"):
selected_category = 'Political_gossip'
with col5:
if st.button("β½ Sports"):
selected_category = 'Sports'
with col6:
if st.button("π World News"):
selected_category = 'World_news'
if selected_category != 'All':
filtered_df = df[df['class'] == selected_category]
else:
filtered_df = df
st.write(f"π Showing news articles in category: {selected_category}")
st.dataframe(filtered_df[['content', 'class']])
# Add a separator
st.markdown("---")
## ====================== Q&A ====================== ##
st.header("π¬ Ask a Question About the News")
question = st.text_input("β Ask a question:")
context = st.text_area("π° Provide the news article or content:", height=150)
if question and context.strip():
result = qa_pipeline(question=question, context=context)
st.success(f"β
Answer: {result['answer']}")
|