|
import streamlit as st |
|
from googlesearch import search |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import chunk |
|
|
|
|
|
def google_search(query): |
|
try: |
|
query = query + "/t site:https://medium.com/" |
|
search_results = search(query, num_results=10) |
|
first_two_links = [] |
|
for i, link in enumerate(search_results): |
|
if i < 2: |
|
first_two_links.append(link) |
|
else: |
|
break |
|
return first_two_links |
|
except Exception as e: |
|
st.error(f"An error occurred: {e}") |
|
return None |
|
|
|
|
|
def fetch_webpage_content(url): |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
return response.text |
|
except Exception as e: |
|
st.error(f"Failed to fetch the webpage content: {e}") |
|
return None |
|
|
|
|
|
def scrape_text(webpage_content): |
|
try: |
|
soup = BeautifulSoup(webpage_content, 'html.parser') |
|
|
|
for script in soup(["script", "style"]): |
|
script.decompose() |
|
text = soup.get_text() |
|
|
|
lines = (line.strip() for line in text.splitlines()) |
|
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
|
|
|
text = '\n'.join(chunk for chunk in chunks if chunk) |
|
return text |
|
except Exception as e: |
|
st.error(f"Failed to scrape text from webpage content: {e}") |
|
return None |
|
|
|
|
|
st.title("Search Link Finder") |
|
|
|
|
|
query = st.text_input("Enter search query", "") |
|
|
|
|
|
if st.button("Search"): |
|
if query: |
|
first_two_links = google_search(query) |
|
if first_two_links: |
|
for i, link in enumerate(first_two_links): |
|
st.success(f"Link {i+1}: [Click here]({link})") |
|
|
|
|
|
webpage_content = fetch_webpage_content(link) |
|
if webpage_content: |
|
|
|
scraped_text = scrape_text(webpage_content) |
|
if scraped_text: |
|
st.write(f"Scraped Content from Link {i+1} (Chunked):") |
|
|
|
|
|
chunk.display_chunks(scraped_text) |
|
|
|
|
|
st.download_button( |
|
label=f"Download Full Webpage Content from Link {i+1}", |
|
data=scraped_text, |
|
file_name=f"webpage_content_{i+1}.txt", |
|
mime="text/plain" |
|
) |
|
else: |
|
st.warning("No results found") |
|
else: |
|
st.error("Please enter a query") |
|
|