Very simple notebook that fits the restaurants revenue data sets using RFR and GBR

Import libraries and read training and testing data files

import pandas as pd
import csv as csv
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder  
from sklearn.ensemble import GradientBoostingRegressor  #GBM algorithm

trainData = pd.read_csv('data/train.csv', header=0, parse_dates = [1])
testData = pd.read_csv('data/test.csv', header=0, parse_dates = [1])

Convert 'Open date' , 'City', 'City Group, and 'Type' to numerical values

# Replace 'Open Date' by a feature representing the age of the resturant in years
# Replace 'Type', 'City' and 'City Group' by integer indicators 
trainData['Open Date'] = ( - trainData['Open Date']).astype('timedelta64[D]') / 365   
trainData['Type'] = LabelEncoder().fit_transform(trainData['Type'])
trainData['City Group'] = LabelEncoder().fit_transform(trainData['City Group'])
trainData['City'] = LabelEncoder().fit_transform(trainData['City'])
# Separate the Y array
Y_train = trainData['revenue']
# Drop the Id and Y variable to create the finale X array to be fitted
X_train = trainData.drop(['Id','revenue'], axis=1) 

# Same for Test data
testData['Open Date'] = ( - testData['Open Date']).astype('timedelta64[D]') / 365   
testData['Type'] = LabelEncoder().fit_transform(testData['Type'])
testData['City Group'] = LabelEncoder().fit_transform(testData['City Group'])
testData['City'] = LabelEncoder().fit_transform(testData['City'])
ids = testData['Id'].values
testData = testData.drop(['Id'], axis=1) 

# Note: You need to restart the Kernel after any modifications to this cell

Simple Random Forest Regressor fit and Submission file

# Creating a RFR with mostly default parameters
forest=RandomForestRegressor(n_estimators=1000, criterion='mse', max_depth=None, min_samples_split=2, 
                                       min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', 
                                       max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=-1, 
                                       random_state=None, verbose=0, warm_start=False)
# Fit the training data,Y_train )
# Predict the testing data
output = forest.predict(testData)

# Write into submission file
predictions_file = open("simpleRFR.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerows(zip(ids, output))
print 'Done.'


Simple Gradient Boosting Regressor fit and Submission file

# Creating a RFR with mostly default parameters
gbr=GradientBoostingRegressor(max_depth= None, max_features= 'auto', min_samples_leaf=1, n_estimators= 1000, learning_rate= 0.01)
# Fit the training data,Y_train )
# Predict the testing data
output = gbr.predict(testData)

# Write into submission file
predictions_file = open("simpleGBR.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerows(zip(ids, output))
print 'Done.'


