Spaces:

Makima57
/

query-app

Sleeping

App Files Files Community

Makima57 commited on Sep 25, 2024

Commit

e6312eb

verified ·

1 Parent(s): e67d52b

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -2,16 +2,14 @@ import streamlit as st
 from googlesearch import search
 import requests
 from bs4 import BeautifulSoup
 # Function to perform Google search and return the first two links
 def google_search(query):
     try:
         query = query + "/t site:https://medium.com/"
-        # Perform the search and get an iterator of results
         search_results = search(query, num_results=10)  # Get up to 10 results
         first_two_links = []
-        # Get the first two results
         for i, link in enumerate(search_results):
             if i < 2:
                 first_two_links.append(link)
@@ -32,16 +30,15 @@ def fetch_webpage_content(url):
         st.error(f"Failed to fetch the webpage content: {e}")
         return None
-# Function to scrape text from webpage content using Beautiful Soup
 def scrape_text(webpage_content):
     try:
         soup = BeautifulSoup(webpage_content, 'html.parser')
         # Remove all script and style elements
         for script in soup(["script", "style"]):
             script.decompose()
-        # Get the text from the BeautifulSoup object
         text = soup.get_text()
-        # Break the text into lines and remove leading and trailing space on each
         lines = (line.strip() for line in text.splitlines())
         # Break multi-headlines into a line each
         chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
@@ -65,18 +62,21 @@ if st.button("Search"):
         if first_two_links:
             for i, link in enumerate(first_two_links):
                 st.success(f"Link {i+1}: [Click here]({link})")
                 # Fetch webpage content
                 webpage_content = fetch_webpage_content(link)
                 if webpage_content:
                     # Scrape text from webpage content
                     scraped_text = scrape_text(webpage_content)
                     if scraped_text:
-                        st.write(f"Scraped Content from Link {i+1}:")
-                        st.write(scraped_text)
-                        # Download button for the webpage content
                         st.download_button(
-                            label=f"Download Webpage Content from Link {i+1}",
                             data=scraped_text,
                             file_name=f"webpage_content_{i+1}.txt",
                             mime="text/plain"

 from googlesearch import search
 import requests
 from bs4 import BeautifulSoup
+import chunk  # Import the chunking functionality from app2.py
 # Function to perform Google search and return the first two links
 def google_search(query):
     try:
         query = query + "/t site:https://medium.com/"
         search_results = search(query, num_results=10)  # Get up to 10 results
         first_two_links = []
         for i, link in enumerate(search_results):
             if i < 2:
                 first_two_links.append(link)
         st.error(f"Failed to fetch the webpage content: {e}")
         return None
+# Function to scrape text from webpage content using BeautifulSoup
 def scrape_text(webpage_content):
     try:
         soup = BeautifulSoup(webpage_content, 'html.parser')
         # Remove all script and style elements
         for script in soup(["script", "style"]):
             script.decompose()
         text = soup.get_text()
+        # Break the text into lines and remove leading/trailing spaces
         lines = (line.strip() for line in text.splitlines())
         # Break multi-headlines into a line each
         chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
         if first_two_links:
             for i, link in enumerate(first_two_links):
                 st.success(f"Link {i+1}: [Click here]({link})")
                 # Fetch webpage content
                 webpage_content = fetch_webpage_content(link)
                 if webpage_content:
                     # Scrape text from webpage content
                     scraped_text = scrape_text(webpage_content)
                     if scraped_text:
+                        st.write(f"Scraped Content from Link {i+1} (Chunked):")
+                        # Call the chunking function from app2.py
+                        chunk.display_chunks(scraped_text)
+                        # Option to download the entire scraped content
                         st.download_button(
+                            label=f"Download Full Webpage Content from Link {i+1}",
                             data=scraped_text,
                             file_name=f"webpage_content_{i+1}.txt",
                             mime="text/plain"