tiny_llm_transformer / generate_data.py
xcx0902's picture
Upload folder using huggingface_hub
9003bbb verified
raw
history blame contribute delete
244 Bytes
import pandas
df = pandas.read_parquet("data.parquet")
f = open("train_data.txt", "w", encoding="utf-8")
count = 0
for row in df.iterrows():
f.write(row[1]["text"] + "\n")
count += 1
if count == 25: break
f.close()