Srinivasan Iyer sviyer commited on
Commit
e299427
·
unverified ·
1 Parent(s): 138c2f3

Cast int sample id to str (#96)

Browse files

Co-authored-by: Srini Iyer <[email protected]>

bytelatent/data/iterators/arrow_iterator.py CHANGED
@@ -215,7 +215,7 @@ class ArrowFileIterator(StatefulIterator):
215
  raise ValueError(f"Unknown file format: {self.file_format}")
216
  for i in range(len(sample_ids)):
217
  out = BltExample(
218
- sample_id=sample_ids[i],
219
  entropies=entropies[i] if entropies is not None else None,
220
  text=texts[i],
221
  tokens=None,
@@ -249,7 +249,7 @@ class ArrowFileIterator(StatefulIterator):
249
  raise ValueError(f"Unknown file format: {self.file_format}")
250
  for i in range(len(sample_ids)):
251
  out = BltExample(
252
- sample_id=sample_ids[i],
253
  entropies=entropies[i] if entropies is not None else None,
254
  text=texts[i],
255
  tokens=None,
 
215
  raise ValueError(f"Unknown file format: {self.file_format}")
216
  for i in range(len(sample_ids)):
217
  out = BltExample(
218
+ sample_id=str(sample_ids[i]),
219
  entropies=entropies[i] if entropies is not None else None,
220
  text=texts[i],
221
  tokens=None,
 
249
  raise ValueError(f"Unknown file format: {self.file_format}")
250
  for i in range(len(sample_ids)):
251
  out = BltExample(
252
+ sample_id=str(sample_ids[i]),
253
  entropies=entropies[i] if entropies is not None else None,
254
  text=texts[i],
255
  tokens=None,