Spaces:
Runtime error
Runtime error
Sreejan
commited on
Commit
·
12fdc0e
1
Parent(s):
66840e4
Initialized
Browse files- Dockerfile +14 -0
- WebScout_Deep_DDC.py +56 -0
- app_fast_api.py +49 -0
- app_flask.py +54 -0
- requirements.txt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
COPY . .
|
13 |
+
|
14 |
+
CMD ["gunicorn", "-b", "0.0.0.0:7860", "app_flask:app"]
|
WebScout_Deep_DDC.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from webscout import DeepWEBS
|
2 |
+
|
3 |
+
def perform_web_search(query, result_num=10, safe=True, extract_webpage=True, overwrite_query_html=False, overwrite_webpage_html=False):
|
4 |
+
|
5 |
+
# Initialize the DeepWEBS class
|
6 |
+
deep_search = DeepWEBS()
|
7 |
+
|
8 |
+
# Set up the search parameters
|
9 |
+
search_params = deep_search.DeepSearch(
|
10 |
+
queries=[query], # Query to search
|
11 |
+
result_num=result_num, # Number of search results
|
12 |
+
safe=safe, # Enable SafeSearch
|
13 |
+
types=["web"], # Search type: web
|
14 |
+
extract_webpage=extract_webpage, # True for extracting webpages
|
15 |
+
overwrite_query_html=overwrite_query_html,
|
16 |
+
overwrite_webpage_html=overwrite_webpage_html,
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
# Execute the search and retrieve results
|
21 |
+
results = deep_search.queries_to_search_results(search_params)
|
22 |
+
|
23 |
+
return results
|
24 |
+
|
25 |
+
def attributes_ext(raw_result):
|
26 |
+
titles_list = []
|
27 |
+
urls_list = []
|
28 |
+
text_list = []
|
29 |
+
|
30 |
+
for values in raw_result:
|
31 |
+
for title in values['query_results']:
|
32 |
+
titles_list.append(title['title'])
|
33 |
+
|
34 |
+
for url in values['query_results']:
|
35 |
+
urls_list.append(url['url'])
|
36 |
+
|
37 |
+
for body in values['query_results']:
|
38 |
+
text_list.append(body['text'])
|
39 |
+
|
40 |
+
return titles_list, urls_list, text_list
|
41 |
+
|
42 |
+
def main(query, result_num=10, safe=True, extract_webpage=True, overwrite_query_html=False, overwrite_webpage_html=False):
|
43 |
+
|
44 |
+
raw_output = perform_web_search(query, result_num=result_num, safe=safe, extract_webpage=extract_webpage, overwrite_query_html=overwrite_query_html, overwrite_webpage_html=overwrite_webpage_html)
|
45 |
+
|
46 |
+
titles_list, urls_list, text_list = attributes_ext(raw_output)
|
47 |
+
|
48 |
+
return titles_list, urls_list, text_list
|
49 |
+
|
50 |
+
if __name__ == "__main__":
|
51 |
+
# Prompt the user for a search query
|
52 |
+
query = "ipl 2p24"
|
53 |
+
|
54 |
+
# Perform the web search
|
55 |
+
results = perform_web_search(query)
|
56 |
+
|
app_fast_api.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Query
|
2 |
+
from typing import List
|
3 |
+
import WebScout_Deep_DDC
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
|
7 |
+
@app.get('/search')
|
8 |
+
async def search(
|
9 |
+
query: str = Query(..., description="The search query"),
|
10 |
+
result_num: int = Query(10, description="Number of results to retrieve"),
|
11 |
+
safe: bool = Query(True, description="Enable safe search"),
|
12 |
+
extract_webpage: bool = Query(True, description="Extract webpage content"),
|
13 |
+
overwrite_query_html: bool = Query(False, description="Overwrite query HTML"),
|
14 |
+
overwrite_webpage_html: bool = Query(False, description="Overwrite webpage HTML")
|
15 |
+
):
|
16 |
+
if query:
|
17 |
+
# Call the main function with the specified parameters
|
18 |
+
titles_list, urls_list, text_list = WebScout_Deep_DDC.main(
|
19 |
+
query,
|
20 |
+
result_num=result_num,
|
21 |
+
safe=safe,
|
22 |
+
extract_webpage=extract_webpage,
|
23 |
+
overwrite_query_html=overwrite_query_html,
|
24 |
+
overwrite_webpage_html=overwrite_webpage_html
|
25 |
+
)
|
26 |
+
|
27 |
+
# Create a dictionary containing the extracted attributes
|
28 |
+
response = {
|
29 |
+
'titles': titles_list,
|
30 |
+
'urls': urls_list,
|
31 |
+
'text': text_list
|
32 |
+
}
|
33 |
+
|
34 |
+
# Return the response as JSON
|
35 |
+
return response
|
36 |
+
else:
|
37 |
+
error_message = {
|
38 |
+
'developer_contact': {
|
39 |
+
'telegram': 'https://t.me/DevsDoCode',
|
40 |
+
'instagram': 'https://www.instagram.com/sree.shades_/',
|
41 |
+
'discord': 'https://discord.gg/ehwfVtsAts',
|
42 |
+
'linkedin': 'https://www.linkedin.com/in/developer-sreejan/',
|
43 |
+
'twitter': 'https://twitter.com/Anand_Sreejan'
|
44 |
+
},
|
45 |
+
'error': 'Oops! Something went wrong. Please contact the developer for assistance.'
|
46 |
+
}
|
47 |
+
return error_message, 400
|
48 |
+
|
49 |
+
# Example Usage : http://127.0.0.1:8000/search?query=python&safe=true
|
app_flask.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
import WebScout_Deep_DDC
|
3 |
+
|
4 |
+
app = Flask(__name__)
|
5 |
+
|
6 |
+
@app.route('/search', methods=['GET'])
|
7 |
+
def search():
|
8 |
+
|
9 |
+
# Extract query parameter
|
10 |
+
query = request.args.get('query')
|
11 |
+
|
12 |
+
# Extract additional parameters
|
13 |
+
result_num = int(request.args.get('result_num', 10))
|
14 |
+
safe = request.args.get('safe', True)
|
15 |
+
extract_webpage = request.args.get('extract_webpage', True)
|
16 |
+
overwrite_query_html = request.args.get('overwrite_query_html', False)
|
17 |
+
overwrite_webpage_html = request.args.get('overwrite_webpage_html', False)
|
18 |
+
|
19 |
+
if query:
|
20 |
+
# Call the main function with the specified parameters
|
21 |
+
titles_list, urls_list, text_list = WebScout_Deep_DDC.main(
|
22 |
+
query,
|
23 |
+
result_num=result_num,
|
24 |
+
safe=safe,
|
25 |
+
extract_webpage=extract_webpage,
|
26 |
+
overwrite_query_html=overwrite_query_html,
|
27 |
+
overwrite_webpage_html=overwrite_webpage_html
|
28 |
+
)
|
29 |
+
|
30 |
+
# Create a dictionary containing the extracted attributes
|
31 |
+
response = {
|
32 |
+
'titles': titles_list,
|
33 |
+
'urls': urls_list,
|
34 |
+
'text': text_list
|
35 |
+
}
|
36 |
+
|
37 |
+
# Return the response as prettified JSON
|
38 |
+
return jsonify(response), 200, {'Content-Type': 'application/json; charset=utf-8'}
|
39 |
+
else:
|
40 |
+
error_message = {
|
41 |
+
'developer_contact': {
|
42 |
+
'telegram': 'https://t.me/DevsDoCode',
|
43 |
+
'instagram': 'https://www.instagram.com/sree.shades_/',
|
44 |
+
'discord': 'https://discord.gg/ehwfVtsAts',
|
45 |
+
'linkedin': 'https://www.linkedin.com/in/developer-sreejan/',
|
46 |
+
'twitter': 'https://twitter.com/Anand_Sreejan'
|
47 |
+
},
|
48 |
+
'error': 'Oops! Something went wrong. Please contact the developer for assistance.'
|
49 |
+
}
|
50 |
+
return jsonify(error_message), 400
|
51 |
+
|
52 |
+
if __name__ == '__main__':
|
53 |
+
app.run(debug=True)
|
54 |
+
# Example Usage : http://127.0.0.1:5000/search?query=python%20tutorial&result_num=20&safe=False&types=web&types=video&extract_webpage=False
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
webscout==1.2.3
|
2 |
+
flask==3.0.0
|
3 |
+
gunicorn
|