|
|
|
|
|
|
|
import streamlit as st |
|
from googlesearch import search |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
|
|
def google_search(query): |
|
try: |
|
query = query + "/t site:https://www.realtor.ca/" |
|
|
|
search_results = search(query, num_results=10) |
|
first_link = next(search_results, None) |
|
return first_link |
|
except Exception as e: |
|
st.error(f"An error occurred: {e}") |
|
return None |
|
|
|
|
|
def fetch_webpage_content(url): |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
return response.text |
|
except Exception as e: |
|
st.error(f"Failed to fetch the webpage content: {e}") |
|
return None |
|
|
|
|
|
def scrape_text(webpage_content): |
|
try: |
|
soup = BeautifulSoup(webpage_content, 'html.parser') |
|
|
|
for script in soup(["script", "style"]): |
|
script.decompose() |
|
|
|
text = soup.get_text() |
|
|
|
lines = (line.strip() for line in text.splitlines()) |
|
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
|
|
|
text = ' |
|
'.join(chunk for chunk in chunks if chunk) |
|
return text |
|
except Exception as e: |
|
st.error(f"Failed to scrape text from webpage content: {e}") |
|
return None |
|
|
|
|
|
st.title("Search Link Finder") |
|
|
|
|
|
query = st.text_input("Enter search query", "") |
|
|
|
|
|
if st.button("Search"): |
|
if query: |
|
first_link = google_search(query) |
|
if first_link: |
|
st.success(f"First link: [Click here]({first_link})") |
|
|
|
webpage_content = fetch_webpage_content(first_link) |
|
if webpage_content: |
|
|
|
scraped_text = scrape_text(webpage_content) |
|
if scraped_text: |
|
st.write(scraped_text) |
|
|
|
st.download_button( |
|
label="Download Webpage Content", |
|
data=webpage_content, |
|
file_name="webpage_content.html", |
|
mime="text/html" |
|
) |
|
else: |
|
st.warning("No results found") |
|
else: |
|
st.error("Please enter a query") |
|
|