notebook.community

Edit and run



In [1]:

    
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm

from sklearn.ensemble.forest import RandomForestRegressor

from sklearn.metrics import mean_squared_error, r2_score

import math
from dateutil.relativedelta import relativedelta
from datetime import datetime, date
%matplotlib inline
plt.rcParams['figure.figsize']=(20,10)
plt.style.use('ggplot')



In [ ]:



In [12]:

    
def prepareDataForClassificationRF(dataset):
    """
    generates categorical output column, which is then used
    to create the train and test data
    """ 
    X = (dataset['lotsize'])
    y = (dataset['price'])
    
    X_train = X[X.index < 400]
    y_train = y[y.index < 400]              
    
    X_test = X[X.index >= 400]    
    y_test = y[y.index >= 400]
    
    return X_train, y_train, X_test, y_test, dataset



In [7]:

    
df = pd.read_csv('../examples/Housing.csv')
df = df[['price', 'lotsize']]



In [8]:

    
df.tail()



In [10]:

    
df.plot(subplots=True)









    Out[10]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fe718e329d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe7191be550>], dtype=object)



In [13]:

    
X_train, y_train, X_test, y_test, dataset  = prepareDataForClassificationRF(df)



In [28]:

    
RF_Model = RandomForestRegressor(n_estimators=100,
                                 max_features=1, oob_score=True)
labels = y_train#[:, None]
features = X_train[:, None]
rgr=RF_Model.fit(features, labels)
X_test_predict=pd.DataFrame(
    rgr.predict(X_test[:, None])).rename(
    columns={0:'predicted_price'}).set_index('predicted_price')
X_train_predict=pd.DataFrame(
    rgr.predict(X_train[:, None])).rename(
    columns={0:'predicted_price'}).set_index('predicted_price')
RF_predict = X_train_predict.append(X_test_predict)



In [29]:

    
df = df.join((RF_predict.reset_index()))



In [30]:

    
df.head()









    Out[30]:







  
    
      
      price
      lotsize
      RFpredict
      predicted_price
    
  
  
    
      0
      42000.0
      5850
      53971.142857
      51526.00000
    
    
      1
      38500.0
      4000
      58810.523006
      58179.58937
    
    
      2
      49500.0
      3060
      47540.000000
      47570.00000
    
    
      3
      60500.0
      6650
      67013.626984
      65406.50000
    
    
      4
      61000.0
      6360
      74531.902525
      72239.44816



In [31]:

    
df[['price', 'predicted_price']].plot()









    Out[31]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe74c400350>



In [32]:

    
df['diff']=df.predicted_price - df.price



In [33]:

    
df.tail()









    Out[33]:







  
    
      
      price
      lotsize
      RFpredict
      predicted_price
      diff
    
  
  
    
      541
      91500.0
      4800
      77032.147619
      76441.033333
      -15058.966667
    
    
      542
      94000.0
      6000
      94827.960101
      88660.732215
      -5339.267785
    
    
      543
      103000.0
      6000
      94827.960101
      88660.732215
      -14339.267785
    
    
      544
      105000.0
      6000
      94827.960101
      88660.732215
      -16339.267785
    
    
      545
      105000.0
      6000
      94827.960101
      88660.732215
      -16339.267785



In [34]:

    
df['diff'].plot(kind='bar')









    Out[34]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe71501be90>



In [35]:

    
#### check R2 ###\n",
r2 = r2_score(y_train[:, None], X_train_predict.reset_index().values)



In [39]:

    
r2









    Out[39]:





0.6976325043846785



In [40]:









    Out[40]:





226585971.8148737



In [ ]:

	price	lotsize
541	91500.0	4800
542	94000.0	6000
543	103000.0	6000
544	105000.0	6000
545	105000.0	6000

	price	lotsize	RFpredict	predicted_price
0	42000.0	5850	53971.142857	51526.00000
1	38500.0	4000	58810.523006	58179.58937
2	49500.0	3060	47540.000000	47570.00000
3	60500.0	6650	67013.626984	65406.50000
4	61000.0	6360	74531.902525	72239.44816

	price	lotsize	RFpredict	predicted_price	diff
541	91500.0	4800	77032.147619	76441.033333	-15058.966667
542	94000.0	6000	94827.960101	88660.732215	-5339.267785
543	103000.0	6000	94827.960101	88660.732215	-14339.267785
544	105000.0	6000	94827.960101	88660.732215	-16339.267785
545	105000.0	6000	94827.960101	88660.732215	-16339.267785