Sreejan commited on
Commit
12fdc0e
·
1 Parent(s): 66840e4

Initialized

Browse files
Files changed (5) hide show
  1. Dockerfile +14 -0
  2. WebScout_Deep_DDC.py +56 -0
  3. app_fast_api.py +49 -0
  4. app_flask.py +54 -0
  5. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app_flask:app"]
WebScout_Deep_DDC.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from webscout import DeepWEBS
2
+
3
+ def perform_web_search(query, result_num=10, safe=True, extract_webpage=True, overwrite_query_html=False, overwrite_webpage_html=False):
4
+
5
+ # Initialize the DeepWEBS class
6
+ deep_search = DeepWEBS()
7
+
8
+ # Set up the search parameters
9
+ search_params = deep_search.DeepSearch(
10
+ queries=[query], # Query to search
11
+ result_num=result_num, # Number of search results
12
+ safe=safe, # Enable SafeSearch
13
+ types=["web"], # Search type: web
14
+ extract_webpage=extract_webpage, # True for extracting webpages
15
+ overwrite_query_html=overwrite_query_html,
16
+ overwrite_webpage_html=overwrite_webpage_html,
17
+ )
18
+
19
+
20
+ # Execute the search and retrieve results
21
+ results = deep_search.queries_to_search_results(search_params)
22
+
23
+ return results
24
+
25
+ def attributes_ext(raw_result):
26
+ titles_list = []
27
+ urls_list = []
28
+ text_list = []
29
+
30
+ for values in raw_result:
31
+ for title in values['query_results']:
32
+ titles_list.append(title['title'])
33
+
34
+ for url in values['query_results']:
35
+ urls_list.append(url['url'])
36
+
37
+ for body in values['query_results']:
38
+ text_list.append(body['text'])
39
+
40
+ return titles_list, urls_list, text_list
41
+
42
+ def main(query, result_num=10, safe=True, extract_webpage=True, overwrite_query_html=False, overwrite_webpage_html=False):
43
+
44
+ raw_output = perform_web_search(query, result_num=result_num, safe=safe, extract_webpage=extract_webpage, overwrite_query_html=overwrite_query_html, overwrite_webpage_html=overwrite_webpage_html)
45
+
46
+ titles_list, urls_list, text_list = attributes_ext(raw_output)
47
+
48
+ return titles_list, urls_list, text_list
49
+
50
+ if __name__ == "__main__":
51
+ # Prompt the user for a search query
52
+ query = "ipl 2p24"
53
+
54
+ # Perform the web search
55
+ results = perform_web_search(query)
56
+
app_fast_api.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ from typing import List
3
+ import WebScout_Deep_DDC
4
+
5
+ app = FastAPI()
6
+
7
+ @app.get('/search')
8
+ async def search(
9
+ query: str = Query(..., description="The search query"),
10
+ result_num: int = Query(10, description="Number of results to retrieve"),
11
+ safe: bool = Query(True, description="Enable safe search"),
12
+ extract_webpage: bool = Query(True, description="Extract webpage content"),
13
+ overwrite_query_html: bool = Query(False, description="Overwrite query HTML"),
14
+ overwrite_webpage_html: bool = Query(False, description="Overwrite webpage HTML")
15
+ ):
16
+ if query:
17
+ # Call the main function with the specified parameters
18
+ titles_list, urls_list, text_list = WebScout_Deep_DDC.main(
19
+ query,
20
+ result_num=result_num,
21
+ safe=safe,
22
+ extract_webpage=extract_webpage,
23
+ overwrite_query_html=overwrite_query_html,
24
+ overwrite_webpage_html=overwrite_webpage_html
25
+ )
26
+
27
+ # Create a dictionary containing the extracted attributes
28
+ response = {
29
+ 'titles': titles_list,
30
+ 'urls': urls_list,
31
+ 'text': text_list
32
+ }
33
+
34
+ # Return the response as JSON
35
+ return response
36
+ else:
37
+ error_message = {
38
+ 'developer_contact': {
39
+ 'telegram': 'https://t.me/DevsDoCode',
40
+ 'instagram': 'https://www.instagram.com/sree.shades_/',
41
+ 'discord': 'https://discord.gg/ehwfVtsAts',
42
+ 'linkedin': 'https://www.linkedin.com/in/developer-sreejan/',
43
+ 'twitter': 'https://twitter.com/Anand_Sreejan'
44
+ },
45
+ 'error': 'Oops! Something went wrong. Please contact the developer for assistance.'
46
+ }
47
+ return error_message, 400
48
+
49
+ # Example Usage : http://127.0.0.1:8000/search?query=python&safe=true
app_flask.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import WebScout_Deep_DDC
3
+
4
+ app = Flask(__name__)
5
+
6
+ @app.route('/search', methods=['GET'])
7
+ def search():
8
+
9
+ # Extract query parameter
10
+ query = request.args.get('query')
11
+
12
+ # Extract additional parameters
13
+ result_num = int(request.args.get('result_num', 10))
14
+ safe = request.args.get('safe', True)
15
+ extract_webpage = request.args.get('extract_webpage', True)
16
+ overwrite_query_html = request.args.get('overwrite_query_html', False)
17
+ overwrite_webpage_html = request.args.get('overwrite_webpage_html', False)
18
+
19
+ if query:
20
+ # Call the main function with the specified parameters
21
+ titles_list, urls_list, text_list = WebScout_Deep_DDC.main(
22
+ query,
23
+ result_num=result_num,
24
+ safe=safe,
25
+ extract_webpage=extract_webpage,
26
+ overwrite_query_html=overwrite_query_html,
27
+ overwrite_webpage_html=overwrite_webpage_html
28
+ )
29
+
30
+ # Create a dictionary containing the extracted attributes
31
+ response = {
32
+ 'titles': titles_list,
33
+ 'urls': urls_list,
34
+ 'text': text_list
35
+ }
36
+
37
+ # Return the response as prettified JSON
38
+ return jsonify(response), 200, {'Content-Type': 'application/json; charset=utf-8'}
39
+ else:
40
+ error_message = {
41
+ 'developer_contact': {
42
+ 'telegram': 'https://t.me/DevsDoCode',
43
+ 'instagram': 'https://www.instagram.com/sree.shades_/',
44
+ 'discord': 'https://discord.gg/ehwfVtsAts',
45
+ 'linkedin': 'https://www.linkedin.com/in/developer-sreejan/',
46
+ 'twitter': 'https://twitter.com/Anand_Sreejan'
47
+ },
48
+ 'error': 'Oops! Something went wrong. Please contact the developer for assistance.'
49
+ }
50
+ return jsonify(error_message), 400
51
+
52
+ if __name__ == '__main__':
53
+ app.run(debug=True)
54
+ # Example Usage : http://127.0.0.1:5000/search?query=python%20tutorial&result_num=20&safe=False&types=web&types=video&extract_webpage=False
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ webscout==1.2.3
2
+ flask==3.0.0
3
+ gunicorn