nlpblogs commited on
Commit
f465edc
·
verified ·
1 Parent(s): 8377b91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -70,6 +70,9 @@ with st.sidebar:
70
  ''')
71
 
72
 
 
 
 
73
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
75
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -107,13 +110,12 @@ if st.button("Sentiment Analysis", type="secondary"):
107
  placeholder = st.empty()
108
  progress_bar = st.progress(0)
109
 
110
- for item in range(150):
111
  try:
112
- body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
- body.send_keys(Keys.END)
114
- placeholder.text(f"Scrolled {item + 1} times")
115
- progress_bar.progress((item + 1) / 150)
116
- time.sleep(0.5)
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
@@ -125,9 +127,16 @@ if st.button("Sentiment Analysis", type="secondary"):
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
 
 
128
  user_id = 1
129
- for comment in comments:
130
- timestamp = datetime.now().strftime("%Y-%m-%d")
 
 
 
 
 
131
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
132
  user_id += 1
133
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -170,16 +179,14 @@ if st.button("Sentiment Analysis", type="secondary"):
170
  file_name='Summary of the results.csv',
171
  mime='text/csv',
172
  )
173
-
174
  else:
175
  st.warning("Please enter a URL.")
176
  else:
177
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
178
 
179
- if 'url_count' in st.session_state: #added if statement.
180
  st.write(f"URL pasted {st.session_state['url_count']} times.")
181
 
182
-
183
 
184
 
185
 
 
70
  ''')
71
 
72
 
73
+
74
+
75
+
76
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
77
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
78
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
 
110
  placeholder = st.empty()
111
  progress_bar = st.progress(0)
112
 
113
+ for item in range(30): #Reduced to 30 scrolls.
114
  try:
115
+ driver.execute_script("window.scrollBy(0, 500);") #javascript scroll
116
+ WebDriverWait(driver, 10).until(
117
+ EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
118
+ )
 
119
  except Exception as e:
120
  st.error(f"Exception during scrolling: {e}")
121
  break
 
127
  try:
128
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
129
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
130
+ timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a") #timestamp elements.
131
+
132
  user_id = 1
133
+ for comment, timestamp_element in zip(comments, timestamps):
134
+ timestamp_text = timestamp_element.get_attribute('href')
135
+ timestamp_text = timestamp_text.split("lc=")[-1]
136
+ try:
137
+ timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
138
+ except:
139
+ timestamp = "Timestamp not found"
140
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
141
  user_id += 1
142
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
179
  file_name='Summary of the results.csv',
180
  mime='text/csv',
181
  )
 
182
  else:
183
  st.warning("Please enter a URL.")
184
  else:
185
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
186
 
187
+ if 'url_count' in st.session_state:
188
  st.write(f"URL pasted {st.session_state['url_count']} times.")
189
 
 
190
 
191
 
192