Update chunk_python_code.py
Browse files- chunk_python_code.py +11 -9
chunk_python_code.py
CHANGED
@@ -89,12 +89,12 @@ def _chunk_import_only_python_code(python_code, file_path):
|
|
89 |
def _handle_notdefined_case(python_code):
|
90 |
documents = []
|
91 |
documents.extend(
|
92 |
-
chunk_python_code_by_character)
|
93 |
-
|
94 |
return documents
|
95 |
|
96 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
97 |
|
|
|
98 |
def _chunk_python_code_by_character(python_code):
|
99 |
documents = []
|
100 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -102,15 +102,17 @@ def _chunk_python_code_by_character(python_code):
|
|
102 |
chunk_overlap=128,
|
103 |
separators=[]
|
104 |
)
|
105 |
-
|
106 |
-
chunks = text_splitter.split_text(text)
|
107 |
-
doc = Document(
|
108 |
-
page_content=python_code
|
109 |
-
)
|
110 |
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
return documents
|
|
|
114 |
|
115 |
|
116 |
def _chunk_nodeless_python_code(python_code, file_path):
|
|
|
89 |
def _handle_notdefined_case(python_code):
|
90 |
documents = []
|
91 |
documents.extend(
|
92 |
+
chunk_python_code_by_character)
|
|
|
93 |
return documents
|
94 |
|
95 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
96 |
|
97 |
+
|
98 |
def _chunk_python_code_by_character(python_code):
|
99 |
documents = []
|
100 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
102 |
chunk_overlap=128,
|
103 |
separators=[]
|
104 |
)
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
chunks = text_splitter.split_text(python_code)
|
107 |
+
|
108 |
+
for chunk in chunks:
|
109 |
+
doc = Document(
|
110 |
+
page_content=chunk
|
111 |
+
)
|
112 |
+
documents.append(doc)
|
113 |
+
|
114 |
return documents
|
115 |
+
|
116 |
|
117 |
|
118 |
def _chunk_nodeless_python_code(python_code, file_path):
|