|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score |
|
|
|
|
|
train_df = pd.read_csv("./input/train.csv") |
|
test_df = pd.read_csv("./input/test.csv") |
|
sample_submission = pd.read_csv("./input/sample_submission.csv") |
|
|
|
|
|
train_texts, val_texts, train_indices, val_indices = train_test_split( |
|
train_df["text"], train_df["index"], test_size=0.1, random_state=42 |
|
) |
|
|
|
|
|
val_predictions = [0] * len(val_texts) |
|
|
|
|
|
|
|
|
|
|
|
|
|
accuracy = accuracy_score(val_indices, val_predictions) |
|
print(f"Validation accuracy: {accuracy}") |
|
|
|
|
|
test_predictions = [0] * len(test_df) |
|
submission = pd.DataFrame( |
|
{"ciphertext_id": test_df["ciphertext_id"], "index": test_predictions} |
|
) |
|
submission.to_csv("./working/submission.csv", index=False) |
|
|