In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [21]:
np.set_printoptions(precision=3, suppress=True)

Data Preparation

Importing a dataset


In [22]:
dataset = pd.read_csv('Position_Salaries.csv')

In [23]:
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values

Regression Model

Fitting the model to the dataset


In [24]:
from sklearn.ensemble import RandomForestRegressor

In [25]:
regressor = RandomForestRegressor(n_estimators=300, random_state=0)
regressor.fit(X, y)


Out[25]:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=300, n_jobs=1, oob_score=False, random_state=0,
           verbose=0, warm_start=False)

Predicting a new result


In [26]:
regressor.predict(6.5)


Out[26]:
array([ 160333.333])

Visualising the Regression results


In [27]:
X_grid = np.arange(min(X), max(X), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(X, y, color='red')
plt.plot(X_grid, regressor.predict(X_grid), color='blue')
plt.title('Truth or Bluff (Random Forest Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()