Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
import streamlit as st
|
4 |
|
5 |
-
|
6 |
from selenium import webdriver
|
7 |
from selenium.webdriver.common.by import By
|
8 |
from selenium.webdriver.chrome.options import Options
|
@@ -141,4 +138,155 @@ def scrape_google_reviews(url):
|
|
141 |
df = pd.DataFrame(review_data)
|
142 |
df[df["review_text"].str.contains("No review text")==False]
|
143 |
st.dataframe(df)
|
|
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
|
|
|
3 |
from selenium import webdriver
|
4 |
from selenium.webdriver.common.by import By
|
5 |
from selenium.webdriver.chrome.options import Options
|
|
|
138 |
df = pd.DataFrame(review_data)
|
139 |
df[df["review_text"].str.contains("No review text")==False]
|
140 |
st.dataframe(df)
|
141 |
+
|
142 |
|
143 |
+
if tokenizer and model:
|
144 |
+
inputs = tokenizer(df['review_text'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
145 |
+
with torch.no_grad():
|
146 |
+
logits = model(**inputs).logits
|
147 |
+
predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
148 |
+
predicted_labels = predicted_probabilities.argmax(dim=1)
|
149 |
+
results = []
|
150 |
+
for i, label in enumerate(predicted_labels):
|
151 |
+
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
|
152 |
+
sentiment_df = pd.DataFrame(results)
|
153 |
+
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
|
154 |
+
final_df = value_counts1
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
|
162 |
+
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
163 |
+
|
164 |
+
result = pd.concat([df, sentiment_df], axis=1)
|
165 |
+
result['rating'] = result['rating'].astype(int)
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
fig2 = px.bar(result, x='Sentiment', y='review_date',
|
173 |
+
hover_data=['rating', 'review_date'], color='rating',
|
174 |
+
labels={'Sentiment':'Sentiment'}, height=400)
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
+
fig3 = px.scatter(result, x=df["review_date"], y=df["rating"], color=df["rating"])
|
179 |
+
return sentiment_df, result, fig1, fig2, fig3
|
180 |
+
else:
|
181 |
+
return df, None, None, None, None
|
182 |
+
except Exception as e:
|
183 |
+
st.error(f"An error occurred: {e}")
|
184 |
+
if 'driver' in locals():
|
185 |
+
driver.quit()
|
186 |
+
return None, None, None, None, None
|
187 |
+
|
188 |
+
# Streamlit UI
|
189 |
+
st.title("Google Maps Reviews Sentiment Analysis")
|
190 |
+
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
|
196 |
+
if 'url_count' not in st.session_state:
|
197 |
+
st.session_state['url_count'] = 0
|
198 |
+
|
199 |
+
max_attempts = 2
|
200 |
+
|
201 |
+
def update_url_count():
|
202 |
+
st.session_state['url_count'] += 1
|
203 |
+
|
204 |
+
def clear_question():
|
205 |
+
st.session_state["url"] = ""
|
206 |
+
|
207 |
+
url = st.text_input("Enter Google Maps Reviews URL:", key="url")
|
208 |
+
st.button("Clear question", on_click=clear_question)
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
if st.button("Sentiment Analysis"):
|
217 |
+
placeholder = st.empty()
|
218 |
+
progress_bar = st.progress(0)
|
219 |
+
|
220 |
+
if st.session_state['url_count'] < max_attempts:
|
221 |
+
if url:
|
222 |
+
sentiment_df, df, fig1, fig2, fig3 = scrape_google_reviews(url)
|
223 |
+
if sentiment_df is not None:
|
224 |
+
st.success("Reviews scraped successfully!")
|
225 |
+
df1 = df[['review_text', 'Sentiment', 'rating', 'review_date']]
|
226 |
+
st.dataframe(df1)
|
227 |
+
|
228 |
+
tab1, tab2, tab3 = st.tabs(["Pie Chart", "Bar Chart", "Scatter Plot"])
|
229 |
+
if fig1 is not None:
|
230 |
+
with tab1:
|
231 |
+
st.plotly_chart(fig1)
|
232 |
+
if fig2 is not None:
|
233 |
+
with tab2:
|
234 |
+
st.plotly_chart(fig2)
|
235 |
+
if fig3 is not None:
|
236 |
+
with tab3:
|
237 |
+
st.plotly_chart(fig3)
|
238 |
+
placeholder.text("Scrolling complete.")
|
239 |
+
progress_bar.empty()
|
240 |
+
buf = io.BytesIO()
|
241 |
+
with zipfile.ZipFile(buf, "w") as myzip:
|
242 |
+
myzip.writestr("Summary of the results.csv", df1.to_csv(index=False))
|
243 |
+
with stylable_container(
|
244 |
+
key="download_button",
|
245 |
+
css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
|
246 |
+
):
|
247 |
+
st.download_button(
|
248 |
+
label="Download zip file",
|
249 |
+
data=buf.getvalue(),
|
250 |
+
file_name="zip file.zip",
|
251 |
+
mime="application/zip",
|
252 |
+
)
|
253 |
+
|
254 |
+
|
255 |
+
else:
|
256 |
+
st.warning("Failed to scrape reviews.")
|
257 |
+
update_url_count()
|
258 |
+
else:
|
259 |
+
st.warning("Please enter a URL.")
|
260 |
+
else:
|
261 |
+
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
262 |
+
|
263 |
+
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
264 |
+
|
265 |
+
|
266 |
+
|
267 |
+
|
268 |
+
|
269 |
+
|
270 |
+
|
271 |
+
|
272 |
+
|
273 |
+
|
274 |
+
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
+
|
279 |
+
|
280 |
+
|
281 |
+
|
282 |
+
|
283 |
+
|
284 |
+
|
285 |
+
|
286 |
+
|
287 |
+
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
|
292 |
+
|