TAgroup5 commited on
Commit
4a1b338
Β·
verified Β·
1 Parent(s): 9bc7dbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -47
app.py CHANGED
@@ -15,40 +15,35 @@ st.markdown("""
15
  font-family: 'Arial', sans-serif;
16
  }
17
  .stApp {
18
- background-image: url('https://i.pinimg.com/736x/9f/07/01/9f070105a396cfe2dc0dc5d7771e61f6.jpg');
19
  background-size: cover;
20
  background-position: center;
21
  padding: 20px;
22
  border-radius: 10px;
23
  box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
24
  }
25
- h1, h2 {
26
  color: #ff4b4b;
 
27
  }
28
  .stButton>button {
29
- background-color: #ff4b4b !important;
30
- color: white;
31
- font-size: 16px;
32
- border-radius: 5px;
 
 
33
  }
34
  .stDownloadButton>button {
35
  background-color: #28a745 !important;
36
- color: white;
37
- font-size: 16px;
38
- border-radius: 5px;
39
- }
40
- .stTextInput>div>div>input {
41
- border-radius: 5px;
42
- border: 1px solid #ccc;
43
- }
44
- .stTextArea>div>textarea {
45
- border-radius: 5px;
46
- border: 1px solid #ccc;
47
  }
48
  </style>
49
  """, unsafe_allow_html=True)
50
 
51
- # Load fine-tuned models and tokenizers
52
  model_name_classification = "TAgroup5/news-classification-model"
53
  model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
54
  tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
@@ -59,64 +54,82 @@ tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
59
 
60
  # Initialize pipelines
61
  text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
62
- qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
63
 
64
  # Streamlit App
65
  st.title(" News Classification and Q&A ")
66
 
67
- ## ====================== Component 1: News Classification ====================== ##
68
  st.header("πŸ“Œ Classify News Articles")
69
- st.markdown("Upload a CSV file with a **'content'** column to classify news into categories.")
70
 
71
  uploaded_file = st.file_uploader("πŸ“‚ Choose a CSV file", type="csv")
72
 
73
  if uploaded_file is not None:
74
- try:
75
- df = pd.read_csv(uploaded_file, encoding="utf-8")
76
- except UnicodeDecodeError:
77
- df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
78
-
79
  if 'content' not in df.columns:
80
  st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
81
  else:
82
  st.write("βœ… Preview of uploaded data:")
83
  st.dataframe(df.head())
84
 
85
- # Preprocessing function
86
  def preprocess_text(text):
87
- text = text.lower() # Convert to lowercase
88
- text = re.sub(r'\s+', ' ', text) # Remove extra spaces
89
- text = re.sub(r'[^a-z\s]', '', text) # Remove special characters & numbers
90
  return text
91
 
92
- # Apply preprocessing and classification
93
  df['processed_content'] = df['content'].apply(preprocess_text)
94
-
95
- # Classify each record into one of the five classes
96
  df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
97
 
98
- # Show results
99
  st.write("πŸ” Classification Results:")
100
  st.dataframe(df[['content', 'class']])
101
 
102
- # Provide CSV download
103
  output = io.BytesIO()
104
  df.to_csv(output, index=False, encoding="utf-8-sig")
105
- st.download_button(label="πŸ“₯ Download Classified News", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")
106
-
107
- ## ====================== Component 2: Q&A ====================== ##
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  st.header("πŸ’¬ Ask a Question About the News")
109
- st.markdown("Enter a question and provide a news article to get an AI-generated answer.")
110
-
111
  question = st.text_input("❓ Ask a question:")
112
  context = st.text_area("πŸ“° Provide the news article or content:", height=150)
113
 
114
  if question and context.strip():
115
- model_name_qa = "distilbert-base-uncased-distilled-squad"
116
- qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
117
  result = qa_pipeline(question=question, context=context)
118
-
119
- if 'answer' in result and result['answer']:
120
- st.success(f"βœ… Answer: {result['answer']}")
121
- else:
122
- st.warning("⚠️ No answer found in the provided content.")
 
15
  font-family: 'Arial', sans-serif;
16
  }
17
  .stApp {
18
+ background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
19
  background-size: cover;
20
  background-position: center;
21
  padding: 20px;
22
  border-radius: 10px;
23
  box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
24
  }
25
+ h1 {
26
  color: #ff4b4b;
27
+ text-align: center;
28
  }
29
  .stButton>button {
30
+ background-color: #088da5 !important;
31
+ color: white !important;
32
+ font-size: 18px !important;
33
+ border-radius: 10px !important;
34
+ width: 100%;
35
+ padding: 10px;
36
  }
37
  .stDownloadButton>button {
38
  background-color: #28a745 !important;
39
+ color: white !important;
40
+ font-size: 16px !important;
41
+ border-radius: 10px !important;
 
 
 
 
 
 
 
 
42
  }
43
  </style>
44
  """, unsafe_allow_html=True)
45
 
46
+ # Load fine-tuned models
47
  model_name_classification = "TAgroup5/news-classification-model"
48
  model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
49
  tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
 
54
 
55
  # Initialize pipelines
56
  text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
57
+ qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)
58
 
59
  # Streamlit App
60
  st.title(" News Classification and Q&A ")
61
 
62
+ ## ====================== News Classification ====================== ##
63
  st.header("πŸ“Œ Classify News Articles")
64
+ st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.")
65
 
66
  uploaded_file = st.file_uploader("πŸ“‚ Choose a CSV file", type="csv")
67
 
68
  if uploaded_file is not None:
69
+ df = pd.read_csv(uploaded_file, encoding="utf-8")
 
 
 
 
70
  if 'content' not in df.columns:
71
  st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
72
  else:
73
  st.write("βœ… Preview of uploaded data:")
74
  st.dataframe(df.head())
75
 
 
76
  def preprocess_text(text):
77
+ text = text.lower()
78
+ text = re.sub(r'\s+', ' ', text)
79
+ text = re.sub(r'[^a-z\s]', '', text)
80
  return text
81
 
 
82
  df['processed_content'] = df['content'].apply(preprocess_text)
 
 
83
  df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
84
 
 
85
  st.write("πŸ” Classification Results:")
86
  st.dataframe(df[['content', 'class']])
87
 
 
88
  output = io.BytesIO()
89
  df.to_csv(output, index=False, encoding="utf-8-sig")
90
+ st.download_button("πŸ“₯ Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")
91
+
92
+ st.write("πŸ” **Filter by Category**")
93
+ categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']
94
+
95
+ col1, col2, col3, col4, col5, col6 = st.columns(6)
96
+ selected_category = 'All'
97
+
98
+ with col1:
99
+ if st.button("All"):
100
+ selected_category = 'All'
101
+ with col2:
102
+ if st.button("πŸ“ˆ Business"):
103
+ selected_category = 'Business'
104
+ with col3:
105
+ if st.button("πŸ—£ Opinion"):
106
+ selected_category = 'Opinion'
107
+ with col4:
108
+ if st.button("πŸ› Political Gossip"):
109
+ selected_category = 'Political_gossip'
110
+ with col5:
111
+ if st.button("⚽ Sports"):
112
+ selected_category = 'Sports'
113
+ with col6:
114
+ if st.button("🌎 World News"):
115
+ selected_category = 'World_news'
116
+
117
+ if selected_category != 'All':
118
+ filtered_df = df[df['class'] == selected_category]
119
+ else:
120
+ filtered_df = df
121
+
122
+ st.write(f"πŸ”Ž Showing news articles in category: {selected_category}")
123
+ st.dataframe(filtered_df[['content', 'class']])
124
+
125
+ # Add a separator
126
+ st.markdown("---")
127
+
128
+ ## ====================== Q&A ====================== ##
129
  st.header("πŸ’¬ Ask a Question About the News")
 
 
130
  question = st.text_input("❓ Ask a question:")
131
  context = st.text_area("πŸ“° Provide the news article or content:", height=150)
132
 
133
  if question and context.strip():
 
 
134
  result = qa_pipeline(question=question, context=context)
135
+ st.success(f"βœ… Answer: {result['answer']}")