Spaces:

Makima57
/

query-app

Sleeping

App Files Files Community

query-app / app.py

Makima57

Update app.py

e6312eb verified 8 months ago

raw

history blame

3.29 kB

	import streamlit as st
	from googlesearch import search
	import requests
	from bs4 import BeautifulSoup
	import chunk # Import the chunking functionality from app2.py

	# Function to perform Google search and return the first two links
	def google_search(query):
	try:
	query = query + "/t site:https://medium.com/"
	search_results = search(query, num_results=10) # Get up to 10 results
	first_two_links = []
	for i, link in enumerate(search_results):
	if i < 2:
	first_two_links.append(link)
	else:
	break
	return first_two_links
	except Exception as e:
	st.error(f"An error occurred: {e}")
	return None

	# Function to fetch webpage content
	def fetch_webpage_content(url):
	try:
	response = requests.get(url)
	response.raise_for_status() # Check if the request was successful
	return response.text
	except Exception as e:
	st.error(f"Failed to fetch the webpage content: {e}")
	return None

	# Function to scrape text from webpage content using BeautifulSoup
	def scrape_text(webpage_content):
	try:
	soup = BeautifulSoup(webpage_content, 'html.parser')
	# Remove all script and style elements
	for script in soup(["script", "style"]):
	script.decompose()
	text = soup.get_text()
	# Break the text into lines and remove leading/trailing spaces
	lines = (line.strip() for line in text.splitlines())
	# Break multi-headlines into a line each
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	# Drop blank lines
	text = '\n'.join(chunk for chunk in chunks if chunk)
	return text
	except Exception as e:
	st.error(f"Failed to scrape text from webpage content: {e}")
	return None

	# Streamlit app UI
	st.title("Search Link Finder")

	# Input field for search query
	query = st.text_input("Enter search query", "")

	# Button to trigger search
	if st.button("Search"):
	if query:
	first_two_links = google_search(query)
	if first_two_links:
	for i, link in enumerate(first_two_links):
	st.success(f"Link {i+1}: [Click here]({link})")

	# Fetch webpage content
	webpage_content = fetch_webpage_content(link)
	if webpage_content:
	# Scrape text from webpage content
	scraped_text = scrape_text(webpage_content)
	if scraped_text:
	st.write(f"Scraped Content from Link {i+1} (Chunked):")

	# Call the chunking function from app2.py
	chunk.display_chunks(scraped_text)

	# Option to download the entire scraped content
	st.download_button(
	label=f"Download Full Webpage Content from Link {i+1}",
	data=scraped_text,
	file_name=f"webpage_content_{i+1}.txt",
	mime="text/plain"
	)
	else:
	st.warning("No results found")
	else:
	st.error("Please enter a query")