broadfield-dev commited on
Commit
d033d1d
·
verified ·
1 Parent(s): d615ba9

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +104 -122
parser.py CHANGED
@@ -1,11 +1,11 @@
1
- # parser.py
2
  import ast
 
3
 
4
  def get_category(node, parent=None):
5
  """Determine the category of an AST node or variable context, including variable roles."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
- elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
9
  return 'function'
10
  elif isinstance(node, ast.ClassDef):
11
  return 'class'
@@ -26,36 +26,63 @@ def get_category(node, parent=None):
26
  elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
27
  if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef, ast.If, ast.Try, ast.While, ast.For)):
28
  return 'assigned_variable'
29
- elif isinstance(node, ast.arg): # Input variables in function definitions
30
  if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
31
  return 'input_variable'
32
- elif isinstance(node, ast.Name): # Returned variables in return statements
33
  if parent and isinstance(parent, ast.Return):
34
  return 'returned_variable'
35
- else:
36
- return 'other' # Default to 'other' for unrecognized nodes
37
 
38
- def create_vector(category, level, location, total_lines, parent_path):
39
- """Create a 6D vector optimized for role similarity, integrating variable roles into category_id."""
40
- category_map = {
41
- 'import': 1, 'function': 2, 'async_function': 3, 'class': 4,
42
- 'if': 5, 'while': 6, 'for': 7, 'try': 8, 'expression': 9, 'spacer': 10,
43
- 'other': 11, 'elif': 12, 'else': 13, 'except': 14, 'finally': 15, 'return': 16,
44
- 'assigned_variable': 17, 'input_variable': 18, 'returned_variable': 19
45
- }
46
- category_id = category_map.get(category, 0) # Default to 0 for unknown categories
47
- start_line, end_line = location
48
- span = (end_line - start_line + 1) / total_lines
49
- center_pos = ((start_line + end_line) / 2) / total_lines
50
- parent_depth = len(parent_path)
51
- parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
52
- for i, parent in enumerate(parent_path)) / max(1, len(category_map))
53
- return [category_id, level, center_pos, span, parent_depth, parent_weight]
54
 
55
- def is_blank_or_comment(line):
56
- """Check if a line is blank or a comment."""
57
- stripped = line.strip()
58
- return not stripped or stripped.startswith('#')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None, processed_lines=None):
61
  if total_lines is None:
@@ -63,7 +90,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
63
  if parent_path is None:
64
  parent_path = []
65
  if counters is None:
66
- counters = {cat: 0 for cat in ['import', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally', 'assigned_variable', 'input_variable', 'returned_variable']}
67
  if processed_lines is None:
68
  processed_lines = set()
69
 
@@ -71,41 +98,35 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
71
  start_line = getattr(node, 'lineno', prev_end + 1)
72
  end_line = getattr(node, 'end_lineno', start_line)
73
 
74
- # Skip if any lines are already processed
75
  if any(line in processed_lines for line in range(start_line, end_line + 1)):
76
  return parts, []
77
 
78
- # Get category, default to 'other' if None
79
  category = get_category(node, parent_path[-1] if parent_path else None) or 'other'
80
  if category not in counters:
81
  category = 'other'
82
  counters[category] += 1
83
  node_id = f"{category.capitalize()}[{counters[category]}]"
 
84
 
85
- # Spacer before node (only for blank lines or comments)
86
  if start_line > prev_end + 1:
87
- spacer_lines = lines[prev_end:start_line - 1]
88
- spacer_lines_set = set(range(prev_end + 1, start_line))
89
- if not spacer_lines_set.issubset(processed_lines):
90
- for i, line in enumerate(spacer_lines, prev_end + 1):
91
- if i not in processed_lines and is_blank_or_comment(line):
92
- counters['spacer'] += 1
93
- spacer_node_id = f"Spacer[{counters['spacer']}]"
94
- parts.append({
95
- 'category': 'spacer',
96
- 'source': line,
97
- 'location': (i, i),
98
- 'level': level,
99
- 'vector': create_vector('spacer', level, (i, i), total_lines, parent_path),
100
- 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
101
- 'node_id': spacer_node_id
102
- })
103
- processed_lines.add(i)
104
 
105
- # Current node's header (e.g., 'def', 'if', 'try')
106
  current_path = parent_path + [node_id]
107
  if start_line not in processed_lines and not is_blank_or_comment(lines[start_line - 1]):
108
- parts.append({
109
  'category': category,
110
  'source': lines[start_line - 1],
111
  'location': (start_line, start_line),
@@ -113,23 +134,22 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
113
  'vector': create_vector(category, level, (start_line, start_line), total_lines, current_path),
114
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
115
  'node_id': node_id
116
- })
 
 
 
117
  processed_lines.add(start_line)
118
 
119
- # Handle variables in function definitions (input variables)
120
- category_sequence = [category]
121
  if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.args.args:
122
  for arg in node.args.args:
123
- var_start = start_line # Assume args are on the same line as function def for simplicity
124
  if var_start not in processed_lines:
125
- arg_category = get_category(arg, node) or 'input_variable'
126
- if arg_category not in counters:
127
- arg_category = 'input_variable'
128
  counters[arg_category] += 1
129
  var_node_id = f"InputVariable[{counters[arg_category]}]"
130
  parts.append({
131
  'category': arg_category,
132
- 'source': f" {arg.arg},", # Indented as part of function
133
  'location': (var_start, var_start),
134
  'level': level + 1,
135
  'vector': create_vector(arg_category, level + 1, (var_start, var_start), total_lines, current_path),
@@ -137,9 +157,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
137
  'node_id': var_node_id
138
  })
139
  processed_lines.add(var_start)
140
- category_sequence.append(arg_category)
141
 
142
- # Process nested bodies
143
  nested_prev_end = start_line
144
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
145
  if hasattr(node, attr) and getattr(node, attr):
@@ -162,10 +180,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
162
  'node_id': sub_node_id
163
  })
164
  processed_lines.add(child_start)
165
- category_sequence.append(sub_category)
166
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
167
  parts.extend(child_parts)
168
- category_sequence.extend(child_seq)
169
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
170
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
171
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
@@ -181,10 +197,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
181
  'node_id': sub_node_id
182
  })
183
  processed_lines.add(child_start)
184
- category_sequence.append('except')
185
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
186
  parts.extend(child_parts)
187
- category_sequence.extend(child_seq)
188
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
189
  elif attr == 'finalbody':
190
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
@@ -200,40 +214,18 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
200
  'node_id': sub_node_id
201
  })
202
  processed_lines.add(child_start)
203
- category_sequence.append('finally')
204
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
205
  parts.extend(child_parts)
206
- category_sequence.extend(child_seq)
207
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
208
  else:
209
- # Handle assignments and returns for variable detection
210
  if isinstance(child, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
211
- # Handle different target structures
212
- if isinstance(child, ast.Assign):
213
- for target in child.targets:
214
- if isinstance(target, ast.Name):
215
- var_start = child.lineno
216
- if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
217
- counters['assigned_variable'] += 1
218
- var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
219
- parts.append({
220
- 'category': 'assigned_variable',
221
- 'source': lines[var_start - 1],
222
- 'location': (var_start, var_start),
223
- 'level': level + 1,
224
- 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path),
225
- 'parent_path': f"{current_path[0]} -> {var_node_id}",
226
- 'node_id': var_node_id
227
- })
228
- processed_lines.add(var_start)
229
- category_sequence.append('assigned_variable')
230
- else: # AnnAssign or AugAssign
231
- target = child.target
232
  if isinstance(target, ast.Name):
233
  var_start = child.lineno
234
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
235
  counters['assigned_variable'] += 1
236
  var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
 
237
  parts.append({
238
  'category': 'assigned_variable',
239
  'source': lines[var_start - 1],
@@ -241,10 +233,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
241
  'level': level + 1,
242
  'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path),
243
  'parent_path': f"{current_path[0]} -> {var_node_id}",
244
- 'node_id': var_node_id
 
245
  })
246
  processed_lines.add(var_start)
247
- category_sequence.append('assigned_variable')
248
  elif isinstance(child, ast.Return):
249
  for value in ast.walk(child):
250
  if isinstance(value, ast.Name):
@@ -262,13 +254,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
262
  'node_id': var_node_id
263
  })
264
  processed_lines.add(var_start)
265
- category_sequence.append('returned_variable')
266
  child_parts, child_seq = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
267
  parts.extend(child_parts)
268
- category_sequence.extend(child_seq)
269
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
270
 
271
- # Update end_line and source of the parent node if its body extends it
272
  if nested_prev_end > start_line and start_line not in processed_lines:
273
  final_end = nested_prev_end
274
  if start_line not in processed_lines:
@@ -277,7 +266,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
277
  parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
278
  processed_lines.update(range(start_line, final_end + 1))
279
 
280
- return parts, category_sequence
281
 
282
  def parse_python_code(code):
283
  lines = code.splitlines(keepends=True)
@@ -290,39 +279,32 @@ def parse_python_code(code):
290
  parts = []
291
  prev_end = 0
292
  processed_lines = set()
293
- category_sequence = []
 
294
 
295
  for stmt in tree.body:
296
- stmt_parts, stmt_seq = parse_node(stmt, lines, prev_end, total_lines=total_lines, processed_lines=processed_lines)
 
 
 
297
  parts.extend(stmt_parts)
298
- category_sequence.extend(stmt_seq)
299
  prev_end = stmt_parts[-1]['location'][1] if stmt_parts else prev_end
300
 
301
  if prev_end < total_lines:
302
- remaining_lines = lines[prev_end:]
303
- remaining_lines_set = set(range(prev_end + 1, total_lines + 1))
304
- if not remaining_lines_set.issubset(processed_lines):
305
- for i, line in enumerate(remaining_lines, prev_end + 1):
306
- if i not in processed_lines:
307
- if is_blank_or_comment(line):
308
- counters = {'spacer': 0}
309
- counters['spacer'] += 1
310
- spacer_node_id = f"Spacer[{counters['spacer']}]"
311
- parts.append({
312
- 'category': 'spacer',
313
- 'source': line,
314
- 'location': (i, i),
315
- 'level': 0,
316
- 'vector': create_vector('spacer', 0, (i, i), total_lines, []),
317
- 'parent_path': 'Top-Level',
318
- 'node_id': spacer_node_id
319
- })
320
- processed_lines.add(i)
321
- category_sequence.append('spacer')
322
-
323
- return parts, category_sequence
324
 
325
- def is_blank_or_comment(line):
326
- """Check if a line is blank or a comment."""
327
- stripped = line.strip()
328
- return not stripped or stripped.startswith('#')
 
 
1
  import ast
2
+ from collections import defaultdict
3
 
4
  def get_category(node, parent=None):
5
  """Determine the category of an AST node or variable context, including variable roles."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
9
  return 'function'
10
  elif isinstance(node, ast.ClassDef):
11
  return 'class'
 
26
  elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
27
  if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef, ast.If, ast.Try, ast.While, ast.For)):
28
  return 'assigned_variable'
29
+ elif isinstance(node, ast.arg):
30
  if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
31
  return 'input_variable'
32
+ elif isinstance(node, ast.Name):
33
  if parent and isinstance(parent, ast.Return):
34
  return 'returned_variable'
35
+ return 'other'
 
36
 
37
+ def get_value(node):
38
+ """Extract the value of an AST node (e.g., for assignment)."""
39
+ if isinstance(node, ast.Constant):
40
+ return str(node.value)
41
+ elif isinstance(node, ast.Name):
42
+ return node.id
43
+ elif isinstance(node, ast.BinOp):
44
+ return '<expression>'
45
+ elif isinstance(node, ast.Call):
46
+ return '<function_call>'
47
+ return '<complex>'
 
 
 
 
 
48
 
49
+ def collect_variable_usage(tree):
50
+ """Collect definitions and uses of variables, respecting scope."""
51
+ var_defs = defaultdict(list) # {var_name: [(node_id, scope)]}
52
+ var_uses = defaultdict(list) # {var_name: [(node_id, scope)]}
53
+ scope_stack = ['global']
54
+
55
+ def traverse(node, current_scope):
56
+ node_id = getattr(node, 'node_id', None)
57
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
58
+ scope_stack.append(node.name)
59
+ current_scope = node.name
60
+ for arg in node.args.args:
61
+ var_defs[arg.arg].append((f"InputVariable[{arg.arg}]", current_scope))
62
+ for body_node in node.body:
63
+ traverse(body_node, current_scope)
64
+ scope_stack.pop()
65
+ elif isinstance(node, (ast.Assign, ast.AnnAssign)):
66
+ value = get_value(node.value) if hasattr(node, 'value') else '<unknown>'
67
+ for target in (node.targets if isinstance(node, ast.Assign) else [node.target]):
68
+ if isinstance(target, ast.Name):
69
+ var_defs[target.id].append((node_id, current_scope))
70
+ # Parse value for uses
71
+ for child in ast.walk(node.value):
72
+ if isinstance(child, ast.Name):
73
+ var_uses[child.id].append((node_id, current_scope))
74
+ elif isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
75
+ var_uses[node.id].append((node_id, current_scope))
76
+ for child in ast.iter_child_nodes(node):
77
+ traverse(child, current_scope)
78
+
79
+ for node in tree.body:
80
+ node_id = getattr(node, 'node_id', None)
81
+ if node_id:
82
+ setattr(node, 'node_id', node_id)
83
+ traverse(node, 'global')
84
+
85
+ return var_defs, var_uses
86
 
87
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None, processed_lines=None):
88
  if total_lines is None:
 
90
  if parent_path is None:
91
  parent_path = []
92
  if counters is None:
93
+ counters = {cat: 0 for cat in ['import', 'function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally', 'assigned_variable', 'input_variable', 'returned_variable']}
94
  if processed_lines is None:
95
  processed_lines = set()
96
 
 
98
  start_line = getattr(node, 'lineno', prev_end + 1)
99
  end_line = getattr(node, 'end_lineno', start_line)
100
 
 
101
  if any(line in processed_lines for line in range(start_line, end_line + 1)):
102
  return parts, []
103
 
 
104
  category = get_category(node, parent_path[-1] if parent_path else None) or 'other'
105
  if category not in counters:
106
  category = 'other'
107
  counters[category] += 1
108
  node_id = f"{category.capitalize()}[{counters[category]}]"
109
+ setattr(node, 'node_id', node_id) # Attach node_id to AST node
110
 
 
111
  if start_line > prev_end + 1:
112
+ for i, line in enumerate(lines[prev_end:start_line - 1], prev_end + 1):
113
+ if i not in processed_lines and is_blank_or_comment(line):
114
+ counters['spacer'] += 1
115
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
116
+ parts.append({
117
+ 'category': 'spacer',
118
+ 'source': line,
119
+ 'location': (i, i),
120
+ 'level': level,
121
+ 'vector': create_vector('spacer', level, (i, i), total_lines, parent_path),
122
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
123
+ 'node_id': spacer_node_id
124
+ })
125
+ processed_lines.add(i)
 
 
 
126
 
 
127
  current_path = parent_path + [node_id]
128
  if start_line not in processed_lines and not is_blank_or_comment(lines[start_line - 1]):
129
+ part = {
130
  'category': category,
131
  'source': lines[start_line - 1],
132
  'location': (start_line, start_line),
 
134
  'vector': create_vector(category, level, (start_line, start_line), total_lines, current_path),
135
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
136
  'node_id': node_id
137
+ }
138
+ if category == 'assigned_variable':
139
+ part['value'] = get_value(node.value) if hasattr(node, 'value') else '<unknown>'
140
+ parts.append(part)
141
  processed_lines.add(start_line)
142
 
 
 
143
  if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.args.args:
144
  for arg in node.args.args:
145
+ var_start = start_line
146
  if var_start not in processed_lines:
147
+ arg_category = 'input_variable'
 
 
148
  counters[arg_category] += 1
149
  var_node_id = f"InputVariable[{counters[arg_category]}]"
150
  parts.append({
151
  'category': arg_category,
152
+ 'source': f" {arg.arg},",
153
  'location': (var_start, var_start),
154
  'level': level + 1,
155
  'vector': create_vector(arg_category, level + 1, (var_start, var_start), total_lines, current_path),
 
157
  'node_id': var_node_id
158
  })
159
  processed_lines.add(var_start)
 
160
 
 
161
  nested_prev_end = start_line
162
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
163
  if hasattr(node, attr) and getattr(node, attr):
 
180
  'node_id': sub_node_id
181
  })
182
  processed_lines.add(child_start)
 
183
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
184
  parts.extend(child_parts)
 
185
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
186
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
187
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
 
197
  'node_id': sub_node_id
198
  })
199
  processed_lines.add(child_start)
 
200
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
201
  parts.extend(child_parts)
 
202
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
203
  elif attr == 'finalbody':
204
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
 
214
  'node_id': sub_node_id
215
  })
216
  processed_lines.add(child_start)
 
217
  child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
218
  parts.extend(child_parts)
 
219
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
220
  else:
 
221
  if isinstance(child, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
222
+ for target in (child.targets if isinstance(child, ast.Assign) else [child.target]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  if isinstance(target, ast.Name):
224
  var_start = child.lineno
225
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
226
  counters['assigned_variable'] += 1
227
  var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
228
+ value = get_value(child.value) if hasattr(child, 'value') else '<unknown>'
229
  parts.append({
230
  'category': 'assigned_variable',
231
  'source': lines[var_start - 1],
 
233
  'level': level + 1,
234
  'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path),
235
  'parent_path': f"{current_path[0]} -> {var_node_id}",
236
+ 'node_id': var_node_id,
237
+ 'value': value
238
  })
239
  processed_lines.add(var_start)
 
240
  elif isinstance(child, ast.Return):
241
  for value in ast.walk(child):
242
  if isinstance(value, ast.Name):
 
254
  'node_id': var_node_id
255
  })
256
  processed_lines.add(var_start)
 
257
  child_parts, child_seq = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
258
  parts.extend(child_parts)
 
259
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
260
 
 
261
  if nested_prev_end > start_line and start_line not in processed_lines:
262
  final_end = nested_prev_end
263
  if start_line not in processed_lines:
 
266
  parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
267
  processed_lines.update(range(start_line, final_end + 1))
268
 
269
+ return parts, []
270
 
271
  def parse_python_code(code):
272
  lines = code.splitlines(keepends=True)
 
279
  parts = []
280
  prev_end = 0
281
  processed_lines = set()
282
+
283
+ var_defs, var_uses = collect_variable_usage(tree)
284
 
285
  for stmt in tree.body:
286
+ stmt_parts, _ = parse_node(stmt, lines, prev_end, total_lines=total_lines, processed_lines=processed_lines)
287
+ for part in stmt_parts:
288
+ part['var_defs'] = var_defs
289
+ part['var_uses'] = var_uses
290
  parts.extend(stmt_parts)
 
291
  prev_end = stmt_parts[-1]['location'][1] if stmt_parts else prev_end
292
 
293
  if prev_end < total_lines:
294
+ for i, line in enumerate(lines[prev_end:], prev_end + 1):
295
+ if i not in processed_lines and is_blank_or_comment(line):
296
+ counters = {'spacer': 0}
297
+ counters['spacer'] += 1
298
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
299
+ parts.append({
300
+ 'category': 'spacer',
301
+ 'source': line,
302
+ 'location': (i, i),
303
+ 'level': 0,
304
+ 'vector': create_vector('spacer', 0, (i, i), total_lines, []),
305
+ 'parent_path': 'Top-Level',
306
+ 'node_id': spacer_node_id
307
+ })
308
+ processed_lines.add(i)
 
 
 
 
 
 
 
309
 
310
+ return parts, []