It's much faster to load the training and validation data from NumPy native files. Load up the CSV files and dump them back out as compressed .npz files.


In [1]:
import numpy as np

Preliminary Data Files


In [5]:
X = np.genfromtxt("train_X.csv", delimiter=',')
Y = np.genfromtxt("train_Y.csv", delimiter=',')
X_val = np.genfromtxt("val_X.csv", delimiter=',')
Y_val = np.genfromtxt("val_Y.csv", delimiter=',')

np.savez_compressed("prelim_files.npz", X=X, Y=Y, X_val=X_val, Y_val=Y_val)

Final Data Files


In [6]:
X_final = np.genfromtxt("train_X_final.csv", delimiter=',')
Y_final = np.genfromtxt("train_Y_final.csv", delimiter=',')
X_val_final = np.genfromtxt("val_X_final.csv", delimiter=',')

np.savez_compressed("final_files.npz", X_final=X_final, Y_final=Y_final, X_val_final=X_val_final)

In [ ]: