Spaces:

Maouu
/

chipling-api

Running

App Files Files Community

Maouu commited on 28 days ago

Commit

ef4e7d0

1 Parent(s): 050c6b4

Revert

Browse files

Files changed (10) hide show

Search/__pycache__/gettyimages.cpython-312.pyc +0 -0
Search/__pycache__/main.cpython-312.pyc +0 -0
Search/__pycache__/useragentka.cpython-312.pyc +0 -0
Search/gettyimages.py +0 -21
Search/main.py +0 -163
Search/useragentka.py +0 -20
__pycache__/app.cpython-312.pyc +0 -0
app.py +0 -48
requirements.txt +1 -3
test.py +0 -18

Search/__pycache__/gettyimages.cpython-312.pyc DELETED Viewed

Binary file (1.03 kB)

Search/__pycache__/main.cpython-312.pyc DELETED Viewed

Binary file (6.96 kB)

Search/__pycache__/useragentka.cpython-312.pyc DELETED Viewed

Binary file (1.68 kB)

Search/gettyimages.py DELETED Viewed

@@ -1,21 +0,0 @@
-from curl_cffi import requests
-from bs4 import BeautifulSoup
-def get_images(query):
-    res = requests.get(f'https://www.gettyimages.in/search/2/image?phrase={query}=editorial', impersonate='chrome110')
-    soup = BeautifulSoup(res.text, 'html.parser')
-    images = soup.find_all('img')
-    results = []
-    for image in images:
-        print(image['src'])
-        if image['src'].startswith('https://media.gettyimages.com'):
-            results.append({'src': image['src'], 'alt': image['alt'], 'class':''})
-        else:
-            continue
-    return results

Search/main.py DELETED Viewed

@@ -1,163 +0,0 @@
-"""googlesearch is a Python library for searching Google, easily."""
-from time import sleep
-from bs4 import BeautifulSoup
-from requests import get
-from urllib.parse import unquote # to decode the url
-from Search.useragentka import get_useragent
-from curl_cffi import requests as curlreq
-from Search.gettyimages import get_images
-def _req(term, results, lang, start, proxies, timeout, safe, ssl_verify, region):
-    resp = get(
-        url="https://www.google.com/search",
-        headers={
-            "User-Agent": get_useragent(),
-            "Accept": "*/*"
-        },
-        params={
-            "q": term,
-            "num": results + 2,  # Prevents multiple requests
-            "hl": lang,
-            "start": start,
-            "safe": safe,
-            "gl": region,
-        },
-        proxies=proxies,
-        timeout=timeout,
-        verify=ssl_verify,
-        cookies = {
-            'CONSENT': 'PENDING+987', # Bypasses the consent page
-            'SOCS': 'CAESHAgBEhIaAB',
-        }
-    )
-    resp.raise_for_status()
-    return resp
-class SearchResult:
-    def __init__(self, url, title, description):
-        self.url = url
-        self.title = title
-        self.description = description
-    def __repr__(self):
-        return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
-def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=None, region=None, start_num=0, unique=False):
-    """Search the Google search engine"""
-    # Proxy setup
-    proxies = {"https": proxy, "http": proxy} if proxy and (proxy.startswith("https") or proxy.startswith("http")) else None
-    start = start_num
-    fetched_results = 0
-    fetched_links = set()
-    results_list = []
-    image_results = []  # New list for image results
-    while fetched_results < num_results:
-        # Send request
-        resp = _req(term, num_results - start,
-                    lang, start, proxies, timeout, safe, ssl_verify, region)
-        # Parse
-        soup = BeautifulSoup(resp.text, "html.parser")
-        result_block = soup.find_all("div", class_="ezO2md")
-        new_results = 0
-        # Find all images on the page
-        try:
-            all_images = soup.find_all("img")  # Google's image class
-            for img in all_images:
-                img_src = img.get("src") or img.get("data-src")
-                if img_src:
-                    # Handle base64 images
-                    if img_src.startswith("data:image"):
-                        image_results.append({
-                            "src": img_src,  # Already base64 encoded
-                            "alt": img.get("alt", ""),
-                            "class": img.get("class", []),
-                        })
-                    # Handle regular image URLs
-                    elif img_src.startswith("http"):
-                        image_results.append({
-                            "src": img_src,
-                            "alt": img.get("alt", ""),
-                            "class": img.get("class", []),
-                        })
-        except Exception as e:
-            print(f"Error parsing images: {str(e)}")
-        for result in result_block:
-            link_tag = result.find("a", href=True)
-            title_tag = link_tag.find("span", class_="CVA68e") if link_tag else None
-            description_tag = result.find("span", class_="FrIlee")
-            if link_tag and title_tag and description_tag:
-                link = unquote(link_tag["href"].split("&")[0].replace("/url?q=", ""))
-                if link in fetched_links and unique:
-                    continue
-                fetched_links.add(link)
-                title = title_tag.text if title_tag else ""
-                description = description_tag.text if description_tag else ""
-                # Only get page_text if advanced mode and we haven't gotten any yet
-                if advanced and not any('page_text' in result for result in results_list):
-                    try:
-                        page_scrape = curlreq.get(link, impersonate='chrome110')
-                        page_scrape.encoding = 'utf-8'
-                        page_soup = BeautifulSoup(page_scrape.text, "html.parser")
-                        # Try multiple strategies to find main content
-                        main_content = (
-                            page_soup.find(['article', 'main']) or
-                            page_soup.find('div', {'id': ['content', 'main-content', 'body-content']}) or
-                            page_soup.find('div', {'class': ['content', 'main', 'article', 'post']}) or
-                            page_soup.find('div', {'role': 'main'}) or
-                            page_soup.body
-                        )
-                        if main_content:
-                            # Remove unwanted elements
-                            for element in main_content(['script', 'style', 'noscript', 'svg', 'header', 'footer', 'nav']):
-                                element.decompose()
-                            # Extract text with better cleaning
-                            text = main_content.get_text(separator=' ', strip=True)
-                            text = ' '.join(line.strip() for line in text.splitlines() if line.strip())
-                            page_text = ' '.join(word for word in text.split() if len(word) > 1)[:3000]
-                        else:
-                            page_text = ""
-                    except Exception as e:
-                        print(f"Error scraping {link}: {str(e)}")
-                        page_text = ""
-                else:
-                    page_text = ""
-                fetched_results += 1
-                new_results += 1
-                if advanced:
-                    results_list.append({
-                        "link": link,
-                        "title": title,
-                        "description": description,
-                        "page_text": page_text,
-                    })
-                else:
-                    results_list.append(link)
-                if fetched_results >= num_results:
-                    break
-        if new_results == 0:
-            break
-        start += 10
-        sleep(sleep_interval)
-    if image_results == [] :
-        images = get_images(term)
-        return {"results": results_list, "images": images}
-    else:
-        return {"results": results_list, "images": image_results}

Search/useragentka.py DELETED Viewed

@@ -1,20 +0,0 @@
-import random
-def get_useragent():
-    """
-    Generates a random user agent string mimicking the format of various software versions.
-    The user agent string is composed of:
-    - Lynx version: Lynx/x.y.z where x is 2-3, y is 8-9, and z is 0-2
-    - libwww version: libwww-FM/x.y where x is 2-3 and y is 13-15
-    - SSL-MM version: SSL-MM/x.y where x is 1-2 and y is 3-5
-    - OpenSSL version: OpenSSL/x.y.z where x is 1-3, y is 0-4, and z is 0-9
-    Returns:
-        str: A randomly generated user agent string.
-    """
-    lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
-    libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
-    ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
-    openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
-    return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"

__pycache__/app.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from fastapi.templating import Jinja2Templates
 from pathlib import Path
 from collections import Counter, defaultdict
 from utils.logger import log_request
-from Search.main import search
 app = FastAPI()
@@ -223,50 +222,3 @@ async def chat(request: ChatRequest):
     log_request("/chat", selected_generator.__name__)
     return StreamingResponse(selected_generator(json_data), media_type='text/event-stream')
-@app.post("/fetch-images")
-async def fetch_images(request: Request):
-    data = await request.json()
-    query = data.get("query", "")
-    num_results = data.get("num_results", 5)
-    lang = data.get("lang", "en")
-    advanced = data.get("advanced", False)
-    # Call the search function
-    results = search(query, num_results=num_results, lang=lang, advanced=advanced)
-    # Log the request
-    log_request("/fetch-images", query)
-    return results['images']
-@app.post("/fetch-links")
-async def fetch_links(request: Request):
-    data = await request.json()
-    query = data.get("query", "")
-    num_results = data.get("num_results", 5)
-    lang = data.get("lang", "en")
-    advanced = data.get("advanced", False)
-    # Call the search function
-    results = search(query, num_results=num_results, lang=lang, advanced=advanced)
-    # Log the request
-    log_request("/fetch-links", query)
-    return results['results']
-@app.post("/fetch-google")
-async def fetch_google(request: Request):
-    data = await request.json()
-    query = data.get("query", "")
-    num_results = data.get("num_results", 5)
-    lang = data.get("lang", "en")
-    advanced = data.get("advanced", True)
-    # Call the search function
-    results = search(query, num_results=num_results, lang=lang, advanced=advanced)
-    # Log the request
-    log_request("/fetch-google", query)
-    return results

 from pathlib import Path
 from collections import Counter, defaultdict
 from utils.logger import log_request
 app = FastAPI()
     log_request("/chat", selected_generator.__name__)
     return StreamingResponse(selected_generator(json_data), media_type='text/event-stream')

requirements.txt CHANGED Viewed

@@ -7,6 +7,4 @@ asyncio
 groq
 jinja2
 aiofiles
-matplotlib
-bs4
-curl_cffi

 groq
 jinja2
 aiofiles
+matplotlib

test.py CHANGED Viewed

@@ -1,18 +0,0 @@
-import requests
-url = "http://localhost:8000/fetch-images"  # or your deployed URL
-payload = {
-    "query": "sunset beach",
-    "num_results": 5,
-    "lang": "en",
-    "advanced": False
-}
-response = requests.post(url, json=payload)
-if response.ok:
-    results = response.json()
-    print("Fetched Images:", results)
-else:
-    print("Error:", response.status_code, response.text)