Example classifier on Numerai data using a logistic regression classifier. To get started, install the required packages: pip install pandas, numpy, sklearn
In [10]:
import pandas as pd
import numpy as np
from sklearn import metrics, preprocessing, linear_model
Set paths to the Data
In [11]:
dirRawData = '/home/john/Projects/RepoNumerAI/data/raw/numerai_datasets/19_03_2017/'
dirOutputData = '/home/john/Projects/RepoNumerAI/data/processed/'
In [12]:
# Set seed for reproducibility
np.random.seed(0)
print("Loading data...")
# Load the data from the CSV files
training_data = pd.read_csv(dirRawData + 'numerai_training_data.csv', header=0)
prediction_data = pd.read_csv(dirRawData + 'numerai_tournament_data.csv', header=0)
Transform the loaded CSV data into numpy arrays
In [13]:
Y = training_data['target']
X = training_data.drop('target', axis=1)
t_id = prediction_data['t_id']
x_prediction = prediction_data.drop('t_id', axis=1)
This is your model that will learn to predict
In [14]:
model = linear_model.LogisticRegression(n_jobs=-1)
print("Training...")
# Your model is trained on the numerai_training_data
model.fit(X, Y)
Out[14]:
In [15]:
print("Predicting...")
# Your trained model is now used to make predictions on the numerai_tournament_data
# The model returns two columns: [probability of 0, probability of 1]
# We are just interested in the probability that the target is 1.
y_prediction = model.predict_proba(x_prediction)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(t_id).join(results_df)
In [16]:
print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv(dirOutputData + "predictions.csv", index=False)
# Now you can upload these predictions on numer.ai
In [ ]: