File size: 4,998 Bytes
246133f
 
5cdc45c
237a63b
5cdc45c
ec68c76
73c0f99
f72fd34
 
 
 
 
 
 
 
 
 
87e256d
0b9251c
 
f72fd34
 
0b9251c
f72fd34
0b9251c
f72fd34
2e81854
f72fd34
 
a73b5ca
2e81854
 
 
 
 
f72fd34
 
 
2e81854
 
 
f72fd34
 
 
 
2e81854
276b437
3412bc8
 
73c0f99
3412bc8
 
 
 
 
9ff5a0e
d033ce0
2ec8bb5
9ff5a0e
0b9251c
2ec8bb5
2e81854
f72fd34
a481c00
9ff5a0e
f72fd34
9ff5a0e
 
2e81854
9ff5a0e
f72fd34
9ff5a0e
f72fd34
9ff5a0e
 
 
2e81854
 
 
9ff5a0e
 
 
 
 
f72fd34
9ff5a0e
 
 
 
a481c00
2e81854
 
a73b5ca
2e81854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a73b5ca
2e81854
 
 
 
 
a73b5ca
2e81854
 
 
d033ce0
 
 
2e81854
d033ce0
2d33e82
 
 
d033ce0
2e81854
f72fd34
 
 
9ff5a0e
 
 
2e81854
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import streamlit as st
import pandas as pd
import re
import io
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from transformers import AutoModelForQuestionAnswering

# Streamlit UI
st.set_page_config(page_title="News Classifier & Q&A", layout="wide")
st.markdown("""
    <style>
        body {
            background-color: #f4f4f4;
            color: #333333;
            font-family: 'Arial', sans-serif;
        }
        .stApp {
            background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
            background-size: cover;
            background-position: center;
            padding: 20px;
            border-radius: 10px;
            box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
        }
        h1 {
            color: #ff4b4b;
            text-align: center;
        }
        .stButton>button {
            background-color: #088da5 !important;
            color: white !important;
            font-size: 18px !important;
            border-radius: 10px !important;
            width: 100%;
            padding: 10px;
        }
        .stDownloadButton>button {
            background-color: #28a745 !important;
            color: white !important;
            font-size: 16px !important;
            border-radius: 10px !important;
        }
    </style>
""", unsafe_allow_html=True)

# Load fine-tuned models
model_name_classification = "TAgroup5/daily-mirror-news-classifier"
model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
tokenizer = AutoTokenizer.from_pretrained(model_name_classification)

model_name_qa = "distilbert-base-cased-distilled-squad"
model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa)
tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)

# Initialize pipelines
text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)

# Streamlit App
st.title(" News Classification and Q&A ")

## ====================== News Classification ====================== ##
st.header("πŸ“Œ Classify News Articles")
st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.")

uploaded_file = st.file_uploader("πŸ“‚ Choose a CSV file", type="csv")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file, encoding="utf-8")
    if 'content' not in df.columns:
        st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
    else:
        st.write("βœ… Preview of uploaded data:")
        st.dataframe(df.head())

        def preprocess_text(text):
            text = text.lower()
            text = re.sub(r'\s+', ' ', text)
            text = re.sub(r'[^a-z\s]', '', text)
            return text

        df['processed_content'] = df['content'].apply(preprocess_text)
        df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")

        st.write("πŸ” Classification Results:")
        st.dataframe(df[['content', 'class']])

        output = io.BytesIO()
        df.to_csv(output, index=False, encoding="utf-8-sig")
        st.download_button("πŸ“₯ Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")

        st.write("πŸ” **Filter by Category**")
        categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']

        col1, col2, col3, col4, col5, col6 = st.columns(6)
        selected_category = 'All'
        
        with col1:
            if st.button("All"):
                selected_category = 'All'
        with col2:
            if st.button("πŸ“ˆ Business"):
                selected_category = 'Business'
        with col3:
            if st.button("πŸ—£ Opinion"):
                selected_category = 'Opinion'
        with col4:
            if st.button("πŸ› Political Gossip"):
                selected_category = 'Political_gossip'
        with col5:
            if st.button("⚽ Sports"):
                selected_category = 'Sports'
        with col6:
            if st.button("🌎 World News"):
                selected_category = 'World_news'

        if selected_category != 'All':
            filtered_df = df[df['class'] == selected_category]
        else:
            filtered_df = df

        st.write(f"πŸ”Ž Showing news articles in category: {selected_category}")
        st.dataframe(filtered_df[['content', 'class']])
        
# Add a separator
st.markdown("---")

## ====================== Q&A ====================== ##
st.header("πŸ’¬ Ask a Question About the News")
question = st.text_input("❓ Ask a question:")
context = st.text_area("πŸ“° Provide the news article or content:", height=150)

if question and context.strip():
    result = qa_pipeline(question=question, context=context)
    st.success(f"βœ… Answer: {result['answer']}")