query-app / app.py
Makima57's picture
Update app.py
e6312eb verified
raw
history blame
3.29 kB
import streamlit as st
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import chunk # Import the chunking functionality from app2.py
# Function to perform Google search and return the first two links
def google_search(query):
try:
query = query + "/t site:https://medium.com/"
search_results = search(query, num_results=10) # Get up to 10 results
first_two_links = []
for i, link in enumerate(search_results):
if i < 2:
first_two_links.append(link)
else:
break
return first_two_links
except Exception as e:
st.error(f"An error occurred: {e}")
return None
# Function to fetch webpage content
def fetch_webpage_content(url):
try:
response = requests.get(url)
response.raise_for_status() # Check if the request was successful
return response.text
except Exception as e:
st.error(f"Failed to fetch the webpage content: {e}")
return None
# Function to scrape text from webpage content using BeautifulSoup
def scrape_text(webpage_content):
try:
soup = BeautifulSoup(webpage_content, 'html.parser')
# Remove all script and style elements
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
# Break the text into lines and remove leading/trailing spaces
lines = (line.strip() for line in text.splitlines())
# Break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# Drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
return text
except Exception as e:
st.error(f"Failed to scrape text from webpage content: {e}")
return None
# Streamlit app UI
st.title("Search Link Finder")
# Input field for search query
query = st.text_input("Enter search query", "")
# Button to trigger search
if st.button("Search"):
if query:
first_two_links = google_search(query)
if first_two_links:
for i, link in enumerate(first_two_links):
st.success(f"Link {i+1}: [Click here]({link})")
# Fetch webpage content
webpage_content = fetch_webpage_content(link)
if webpage_content:
# Scrape text from webpage content
scraped_text = scrape_text(webpage_content)
if scraped_text:
st.write(f"Scraped Content from Link {i+1} (Chunked):")
# Call the chunking function from app2.py
chunk.display_chunks(scraped_text)
# Option to download the entire scraped content
st.download_button(
label=f"Download Full Webpage Content from Link {i+1}",
data=scraped_text,
file_name=f"webpage_content_{i+1}.txt",
mime="text/plain"
)
else:
st.warning("No results found")
else:
st.error("Please enter a query")