query-app / app.py
Makima57's picture
Update app.py
ca30905 verified
raw
history blame
3.25 kB
import streamlit as st
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import chunk # Import the chunking function from chunk.py
# Function to perform Google search and return the first link
def google_search(query):
try:
search_results = search(query, num_results=2) # Get first two results
first_two_links = [next(search_results, None), next(search_results, None)]
return first_two_links
except Exception as e:
st.error(f"An error occurred: {e}")
return None
# Function to fetch webpage content
def fetch_webpage_content(url):
try:
response = requests.get(url)
response.raise_for_status() # Check if the request was successful
return response.text
except Exception as e:
st.error(f"Failed to fetch the webpage content: {e}")
return None
# Function to scrape text from webpage content using Beautiful Soup
def scrape_text(webpage_content):
try:
soup = BeautifulSoup(webpage_content, 'html.parser')
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n'.join(chunk for chunk in chunks if chunk)
return text
except Exception as e:
st.error(f"Failed to scrape text from webpage content: {e}")
return None
# Streamlit app UI
st.title("Search and Chunk Webpage Content")
# Input field for search query
query = st.text_input("Enter search query", "")
# Button to trigger search
if st.button("Search"):
if query:
first_two_links = google_search(query)
if first_two_links:
for i, link in enumerate(first_two_links, 1):
st.success(f"Link {i}: [Click here]({link})")
# Fetch webpage content
webpage_content = fetch_webpage_content(link)
if webpage_content:
# Scrape text from webpage content
scraped_text = scrape_text(webpage_content)
if scraped_text:
# Chunk the scraped text using chunk.py
chunked_text = chunk.chunk_text(scraped_text)
# Save chunked data to a .txt file for later use
with open("chunked_data.txt", "w") as f:
f.write("\n---\n".join(chunked_text)) # Separate chunks by a line break and delimiter
st.write(f"Chunked Data for Link {i}:")
for chunk_part in chunked_text:
st.write(chunk_part)
# Provide download button for the chunked text
st.download_button(
label="Download Chunked Webpage Content",
data="\n---\n".join(chunked_text),
file_name="chunked_webpage_content.txt",
mime="text/plain"
)
else:
st.warning("No results found")
else:
st.error("Please enter a query")