Spaces:
Sleeping
Sleeping
Update crawl_the_site.py
Browse files- crawl_the_site.py +1 -1
crawl_the_site.py
CHANGED
@@ -3,7 +3,7 @@ import requests as req
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
from urllib.parse import urljoin,urlparse
|
5 |
|
6 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
7 |
|
8 |
def get_base(url):
|
9 |
parsed = urlparse(url)
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
from urllib.parse import urljoin,urlparse
|
5 |
|
6 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=300,length_function=len)
|
7 |
|
8 |
def get_base(url):
|
9 |
parsed = urlparse(url)
|