TAgroup5 commited on
Commit
237a63b
Β·
verified Β·
1 Parent(s): 1c450f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -65
app.py CHANGED
@@ -1,80 +1,75 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import time
4
- from transformers import pipeline
5
 
6
- # Set page title and layout
7
- st.set_page_config(page_title="CSV News Classifier", layout="wide")
 
 
8
 
9
- # Load the fine-tuned Hugging Face model
10
- @st.cache_resource
11
- def load_model():
12
- return pipeline("text-classification", model="TAgroup5/daily-mirror-news-classifier")
13
 
14
- classifier = load_model()
 
15
 
16
- # Custom CSS for Colors and Styling
17
- st.markdown(
18
- """
19
- <style>
20
- body { background-color: #f8f9fa; }
21
- .stApp { background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0px 0px 10px rgba(0,0,0,0.1); }
22
-
23
- .custom-box {
24
- background-color: #f0f0f0;
25
- padding: 15px;
26
- border-radius: 10px;
27
- border: 1px solid #ccc;
28
- margin-bottom: 15px;
29
- }
30
- h1 { color: #ff5733; text-align: center; }
31
- h2, h3 { color: #007bff; }
32
- </style>
33
- """,
34
- unsafe_allow_html=True
35
- )
36
-
37
- # Page Title
38
- st.title("πŸ“Š News Classification from CSV")
39
-
40
- # File Uploader
41
- st.markdown('<div class="custom-box"><h3>πŸ“‚ Upload a CSV file</h3></div>', unsafe_allow_html=True)
42
- uploaded_file = st.file_uploader("", type=["csv"])
43
 
44
  if uploaded_file is not None:
45
- # Read CSV
46
  df = pd.read_csv(uploaded_file)
 
 
 
 
 
 
47
 
48
- # Show Preview
49
- st.markdown("### πŸ” **Preview of Uploaded File**", unsafe_allow_html=True)
50
- st.dataframe(df.head())
 
 
 
 
 
 
 
 
 
51
 
52
- # Assuming the column with news articles is named "news_text"
53
- if "news_text" not in df.columns:
54
- st.error("❌ The uploaded CSV must contain a 'news_text' column.")
55
- else:
56
- # Perform classification
57
- st.markdown("### 🏷️ **Classifying News Articles...**")
58
- with st.spinner("Processing..."):
59
- df["class"] = df["news_text"].apply(lambda text: classifier(text)[0]["label"])
60
 
61
- # Show Preview of Results
62
- st.markdown("### πŸ“Œ **Preview of Classified Data**", unsafe_allow_html=True)
63
- st.dataframe(df.head())
64
 
65
- # Download Button for Classified CSV
66
- csv = df.to_csv(index=False).encode("utf-8")
67
- st.markdown("### πŸ“₯ **Download Classified CSV**", unsafe_allow_html=True)
68
- st.download_button(
69
- label="⬇️ **Download Classified CSV**",
70
- data=csv,
71
- file_name="classified_news.csv",
72
- mime="text/csv",
73
- help="Click to download the classified news file"
74
- )
75
-
76
- # Footer
77
- st.markdown("---")
78
- st.markdown('<p style="text-align:center; font-size:14px; color:#6c757d;">πŸ‘¨β€πŸ’» Developed by <b>Ridmi Navodya</b> | Powered by Streamlit πŸš€</p>', unsafe_allow_html=True)
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
4
+ import io
5
 
6
+ # Load pre-trained model and tokenizer for text classification
7
+ model_name = "TAgroup5/news-classification-model"
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
+ # Initialize the text classification pipeline
12
+ text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
 
 
13
 
14
+ # Initialize the question answering pipeline
15
+ qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
16
 
17
+ # Streamlit App Layout
18
+ st.title("News Classification and Q&A")
19
+
20
+ # Component 1: Text Classification Pipeline
21
+ st.header("Classify News Articles")
22
+
23
+ st.markdown("""
24
+ Upload a CSV file containing news articles, and the model will classify each article
25
+ into one of the following categories: Business, Opinion, Political Gossip, Sports, or World News.
26
+ """)
27
+
28
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if uploaded_file is not None:
 
31
  df = pd.read_csv(uploaded_file)
32
+
33
+ if 'content' not in df.columns:
34
+ st.error("The uploaded CSV file must have a 'content' column containing news excerpts.")
35
+ else:
36
+ st.write("Preview of the data:")
37
+ st.dataframe(df.head())
38
 
39
+ # Preprocess the data and classify each article
40
+ def preprocess_text(text):
41
+ # Apply necessary preprocessing steps here (e.g., removing stopwords, special characters, etc.)
42
+ return text
43
+
44
+ # Apply preprocessing and classification
45
+ df['processed_content'] = df['content'].apply(preprocess_text)
46
+ df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'])
47
+
48
+ # Show the results
49
+ st.write("Classification Results:")
50
+ st.dataframe(df[['content', 'class']])
51
 
52
+ # Provide an option to download the output as CSV
53
+ output = io.StringIO()
54
+ df.to_csv(output, index=False)
55
+ st.download_button(label="Download classified news", data=output.getvalue(), file_name="output.csv", mime="text/csv")
 
 
 
 
56
 
 
 
 
57
 
58
+ # Component 2: Q&A Pipeline
59
+ st.header("Ask a Question About the News")
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ st.markdown("""
62
+ Type in a question, and the model will extract an answer from the provided news content.
63
+ """)
64
+
65
+ question = st.text_input("Ask a question:")
66
+
67
+ if question:
68
+ context = st.text_area("Provide the news article or content for the Q&A:", height=150)
69
+
70
+ if context:
71
+ # Perform the question-answering task
72
+ result = qa_pipeline(question=question, context=context)
73
+
74
+ st.write("Answer:", result['answer'])
75