In [1]:

    
from datetime import datetime, timedelta
import itertools
from IPython.display import clear_output
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
import time
import numpy as np
import pandas as pd
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import matplotlib.pyplot as plt
%matplotlib inline
from collections import Counter, defaultdict
import seaborn as sns
import re
import math
from datetime import datetime

load data



In [2]:

    
data = pd.read_csv("./data/clean/cleanData.csv")
print data.shape









    



(49550, 21)



In [3]:

    
data.built = 2017 - data.built
data.Lastremodel = 2017 - data.Lastremodel
data.head()









    Out[3]:







  
    
      
      city
      zipcode
      area
      bed
      bath
      sqft
      price
      Zestimate
      type
      built
      Lastremodel
      year1
      price1
      year2
      price2
      year3
      price3
      year4
      price4
      year5
      price5
    
  
  
    
      0
      Fremont
      94555.0
      Northgate
      4.0
      1.0
      1400.0
      140000.0
      986436.0
      Single Family
      47.0
      47.0
      1999-06-01
      292000.0
      NaN
      140000.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      Fremont
      94538.0
      Downtown
      2.0
      2.0
      1042.0
      160000.0
      535006.0
      Condo
      45.0
      45.0
      NaN
      225000.0
      2017-04-28
      160000.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      Fremont
      94539.0
      Mission Valley
      3.0
      2.0
      1434.0
      130000.0
      1320951.0
      Single Family
      56.0
      55.0
      2017-04-25
      130000.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      Fremont
      94538.0
      Sundale
      3.0
      1.0
      948.0
      141500.0
      668271.0
      Single Family
      58.0
      56.0
      2000-06-26
      329000.0
      2017-04-21
      141500.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      Fremont
      94538.0
      Blacow
      2.0
      2.5
      1157.0
      60000.0
      710981.0
      Condo
      23.0
      23.0
      1995-02-17
      173000.0
      2000-02-29
      244500.0
      2015-07-21
      605000.0
      2017-03-31
      60000.0
      NaN
      NaN

which y to use?



In [4]:

    
plt.scatter(x=data.price, y=data.Zestimate)
plt.xlabel("price")
plt.ylabel("Zestimate")
plt.scatter(x=data.price, y=data.price, marker=".")









    Out[4]:





<matplotlib.collections.PathCollection at 0x1146d8310>

remove columns



In [5]:

    
use_columns = ["city","zipcode","area","bed","bath","sqft","Zestimate","type","built"]
data = data.loc[:, use_columns]
sns.barplot(y=data.isnull().sum().index, x=data.isnull().sum().values*1.0/data.shape[0])









    Out[5]:





<matplotlib.axes._subplots.AxesSubplot at 0x1148bd710>

remove rows



In [6]:

    
print data.shape
ind = data.isnull().sum(1) == 0
data = data.loc[ind, :]
print data.shape









    



(49550, 9)
(45870, 9)

convert to DF



In [7]:

    
data.head()









    Out[7]:







  
    
      
      city
      zipcode
      area
      bed
      bath
      sqft
      Zestimate
      type
      built
    
  
  
    
      0
      Fremont
      94555.0
      Northgate
      4.0
      1.0
      1400.0
      986436.0
      Single Family
      47.0
    
    
      1
      Fremont
      94538.0
      Downtown
      2.0
      2.0
      1042.0
      535006.0
      Condo
      45.0
    
    
      2
      Fremont
      94539.0
      Mission Valley
      3.0
      2.0
      1434.0
      1320951.0
      Single Family
      56.0
    
    
      3
      Fremont
      94538.0
      Sundale
      3.0
      1.0
      948.0
      668271.0
      Single Family
      58.0
    
    
      4
      Fremont
      94538.0
      Blacow
      2.0
      2.5
      1157.0
      710981.0
      Condo
      23.0



In [8]:

    
cate_df = data.loc[:, ["city", "zipcode", "area", "bet", "bath", "type"]]
cont_df = data.loc[:, ["sqft", "Zestimate", "built"]]

cate_df = cate_df.astype("str")



In [9]:

    
dummy_df = pd.get_dummies(cate_df)
print dummy_df.shape









    



(45870, 206)



In [10]:

    
df_full = pd.concat([cont_df, dummy_df], axis=1)
print df_full.shape
df_full.head()









    



(45870, 209)






    Out[10]:







  
    
      
      sqft
      Zestimate
      built
      city_Fremont
      city_Mountain View
      city_Newark
      city_Redwood City
      city_San Jose
      city_San Mateo
      city_Santa Clara
      city_Sunnyvale
      city_Union City
      zipcode_94040.0
      zipcode_94041.0
      zipcode_94043.0
      zipcode_94061.0
      zipcode_94062.0
      zipcode_94063.0
      zipcode_94065.0
      zipcode_94085.0
      zipcode_94086.0
      zipcode_94087.0
      zipcode_94089.0
      zipcode_94401.0
      zipcode_94402.0
      zipcode_94403.0
      zipcode_94404.0
      zipcode_94536.0
      zipcode_94538.0
      zipcode_94539.0
      zipcode_94555.0
      zipcode_94560.0
      zipcode_94587.0
      zipcode_95050.0
      zipcode_95051.0
      zipcode_95054.0
      zipcode_95110.0
      zipcode_95111.0
      zipcode_95112.0
      zipcode_95113.0
      zipcode_95116.0
      zipcode_95117.0
      zipcode_95118.0
      zipcode_95119.0
      zipcode_95120.0
      zipcode_95121.0
      zipcode_95122.0
      zipcode_95123.0
      zipcode_95124.0
      zipcode_95125.0
      zipcode_95126.0
      zipcode_95127.0
      zipcode_95128.0
      zipcode_95129.0
      zipcode_95130.0
      zipcode_95131.0
      zipcode_95132.0
      zipcode_95133.0
      zipcode_95134.0
      zipcode_95135.0
      zipcode_95136.0
      zipcode_95138.0
      zipcode_95139.0
      zipcode_95148.0
      area_28 Palms
      area_94040
      area_94041
      area_94043
      area_94062
      area_94560
      area_94587
      area_95050
      area_95051
      area_95054
      area_Almaden Valley
      area_Alum Rock-East Foothills
      area_Ampex
      area_Aragon
      area_Ardenwood
      area_Arlington
      area_Bay Meadows
      area_Baywood
      area_Baywood Knolls
      area_Baywood Park
      area_Berryessa
      area_Blacow
      area_Blossom Valley
      area_Brookvale
      area_Cabrillo
      area_Cambrian Park
      area_Cameron Hills
      area_Canyon
      area_Canyon Heights
      area_Centennial
      area_Centerville
      area_Central
      area_Central Business District
      area_Cherry-Guardino
      area_College Heights
      area_Country Club Heights
      area_Cuesta Park
      area_De Anza
      area_Dolphin
      area_Downtown
      area_Eagle Hill
      area_East Industrial
      area_East Murphy
      area_East San Jose
      area_East San Mateo
      area_Eastern Varsity Park
      area_Edenvale - Seven Trees
      area_Edgewater Isle
      area_Evergreen
      area_Fairgrounds
      area_Farm Hill
      area_Fiesta Gardens
      area_Foothill Terrace
      area_Friendly Acres
      area_Gemello Park
      area_Glenmoor
      area_Greater San Antonio
      area_Grimmer
      area_Hayward Park
      area_Irvington
      area_Kimber-Gomes
      area_Lakes and Birds
      area_Lakewood
      area_Laurelwood
      area_Lauriedale
      area_Lido
      area_Los Prados
      area_Marina Park
      area_Mariners Island
      area_Marlin
      area_Martens-Carmelita
      area_Middlefield
      area_Mission Hills
      area_Mission San Jose
      area_Mission Valley
      area_Moffett Boulevard
      area_Monta Loma
      area_Niles
      area_Nineteenth Avenue
      area_North Central
      area_North San Jose
      area_North Shoreview
      area_North Valley
      area_North Wishman
      area_Northeast Hillsdale
      area_Northgate
      area_Northwest Hillsdale
      area_Oakwood
      area_Old Mountain View
      area_Ortega
      area_Palm
      area_Parkmont
      area_Parkside
      area_Ponderosa
      area_Raynor
      area_Rex Manor
      area_Roosevelt
      area_Rose Garden
      area_Saint Francis Acres
      area_San Mateo Heights
      area_San Mateo Park
      area_San Mateo Village
      area_Santa Teresa
      area_Sequoia
      area_Serra
      area_Shearwater
      area_Shoreline West
      area_Slater
      area_South Shoreview
      area_South Sundale
      area_Southeast Hillsdale
      area_Southwest Hillsdale
      area_Spring Meadows
      area_Steinberger
      area_Sundale
      area_Sunnybrae
      area_Sylvan Park
      area_Twenty-Fifth Ave
      area_Vineyards-Avalon
      area_Wagon Wheel
      area_Warm Springs
      area_Washington
      area_Waverly Park
      area_Weibel
      area_West Murphy
      area_West San Jose
      area_Westport
      area_Westshore
      area_Willow Glen
      area_Woodside Plaza
      bet_nan
      bath_1.0
      bath_1.5
      bath_2.0
      bath_2.5
      bath_3.0
      bath_3.5
      bath_4.0
      bath_4.5
      bath_5.0
      type_Condo
      type_Mobile / Manufactured
      type_Multi Family
      type_Single Family
      type_Townhouse
    
  
  
    
      0
      1400.0
      986436.0
      47.0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      1
      1042.0
      535006.0
      45.0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      1
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
    
    
      2
      1434.0
      1320951.0
      56.0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      3
      948.0
      668271.0
      58.0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      1157.0
      710981.0
      23.0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0

model



In [11]:

    
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor



In [12]:

    
X = df_full.drop("Zestimate", 1)
y = df_full.Zestimate
print X.shape
print y.shape









    



(45870, 208)
(45870,)



In [13]:

    
rep = 10
mat = []
ip = np.zeros(X.shape[1])

for i in xrange(rep):
    RF = RandomForestRegressor()
    pred = cross_val_predict(RF, X, y, cv=5)
    mat.append(pred)
    RF.fit(X, y)
    ip += RF.feature_importances_
    print i
    
preds = np.array(mat).mean(0)
err = np.sqrt(mean_squared_error(y_pred=preds, y_true=y))
plt.scatter(x=y, y=preds)
plt.scatter(x=y, y=y, marker=".")
plt.xlabel("true")
plt.ylabel("prediction")
plt.title("RF error: " + str(err))









    



0
1
2
3
4
5
6
7
8
9






    Out[13]:





<matplotlib.text.Text at 0x116072490>



In [14]:

    
ipdf = pd.DataFrame(zip(X.columns, ip), columns=["feature", "importance"]).sort_values("importance", ascending=False)
ipdf.head(20)









    Out[14]:







  
    
      
      feature
      importance
    
  
  
    
      0
      sqft
      5.035661
    
    
      1
      built
      0.886621
    
    
      3
      city_Mountain View
      0.575613
    
    
      6
      city_San Jose
      0.414703
    
    
      20
      zipcode_94087.0
      0.293658
    
    
      206
      type_Single Family
      0.240179
    
    
      5
      city_Redwood City
      0.230413
    
    
      52
      zipcode_95129.0
      0.191835
    
    
      7
      city_San Mateo
      0.172116
    
    
      9
      city_Sunnyvale
      0.156342
    
    
      188
      area_West San Jose
      0.119842
    
    
      28
      zipcode_94539.0
      0.114076
    
    
      191
      area_Willow Glen
      0.107768
    
    
      23
      zipcode_94402.0
      0.106149
    
    
      106
      area_East San Jose
      0.089230
    
    
      8
      city_Santa Clara
      0.082126
    
    
      2
      city_Fremont
      0.076372
    
    
      31
      zipcode_94587.0
      0.074628
    
    
      43
      zipcode_95120.0
      0.073314
    
    
      19
      zipcode_94086.0
      0.056367



In [15]:

    
plt.plot(np.cumsum(sorted(ipdf.importance, reverse=True)))









    Out[15]:





[<matplotlib.lines.Line2D at 0x11ab313d0>]



In [16]:

    
top_features = ipdf.feature[:20]
X = X.loc[:, top_features]
y = df_full.Zestimate
print X.shape
print y.shape









    



(45870, 20)
(45870,)



In [17]:

    
rep = 10
mat = []
ip = np.zeros(X.shape[1])

for i in xrange(rep):
    RF = RandomForestRegressor()
    pred = cross_val_predict(RF, X, y, cv=5)
    mat.append(pred)
    RF.fit(X, y)
    ip += RF.feature_importances_
    print i
    
preds = np.array(mat).mean(0)
err = np.sqrt(mean_squared_error(y_pred=preds, y_true=y))
plt.scatter(x=y, y=preds)
plt.scatter(x=y, y=y, marker=".")
plt.xlabel("true")
plt.ylabel("prediction")
plt.title("RF error: " + str(err))









    



0
1
2
3
4
5
6
7
8
9






    Out[17]:





<matplotlib.text.Text at 0x11ac45c90>



In [18]:

    
pd.DataFrame(zip(X.columns, ip), columns=["feature", "importance"]).sort_values("importance", ascending=False).head(20)









    Out[18]:







  
    
      
      feature
      importance
    
  
  
    
      0
      sqft
      5.344135
    
    
      1
      built
      1.106410
    
    
      2
      city_Mountain View
      0.596193
    
    
      3
      city_San Jose
      0.425648
    
    
      4
      zipcode_94087.0
      0.303222
    
    
      5
      type_Single Family
      0.290352
    
    
      6
      city_Redwood City
      0.254234
    
    
      7
      zipcode_95129.0
      0.195000
    
    
      8
      city_San Mateo
      0.192148
    
    
      9
      city_Sunnyvale
      0.179715
    
    
      17
      zipcode_94587.0
      0.136951
    
    
      12
      area_Willow Glen
      0.136892
    
    
      13
      zipcode_94402.0
      0.132637
    
    
      10
      area_West San Jose
      0.128530
    
    
      11
      zipcode_94539.0
      0.122857
    
    
      15
      city_Santa Clara
      0.108916
    
    
      14
      area_East San Jose
      0.100851
    
    
      16
      city_Fremont
      0.087946
    
    
      18
      zipcode_95120.0
      0.087644
    
    
      19
      zipcode_94086.0
      0.069718

visualization



In [19]:

    
plt.scatter(x=data.sqft, y=data.Zestimate)
plt.xlabel("sqft")
plt.ylabel("Zestimate")









    Out[19]:





<matplotlib.text.Text at 0x11acd9550>



In [20]:

    
plt.figure(figsize=(40,10))
sns.boxplot(x=data.zipcode, y=data.Zestimate)
plt.yticks(rotation=90)









    Out[20]:





(array([ -500000.,        0.,   500000.,  1000000.,  1500000.,  2000000.,
         2500000.,  3000000.,  3500000.,  4000000.,  4500000.]),
 <a list of 11 Text yticklabel objects>)



In [25]:

    
plt.figure(figsize=(40,10))
sns.boxplot(x=data.zipcode, y=data.Zestimate / data.sqft)
plt.yticks(rotation=90) 
plt.ylim(0,2000)
plt.title("price per square by zip code")









    Out[25]:





<matplotlib.text.Text at 0x124925dd0>



In [21]:

    
sns.boxplot(x=data.bed, y=data.Zestimate)









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x1178b0cd0>



In [22]:

    
sns.boxplot(x=data.bath, y=data.Zestimate)









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x119c3b110>



In [ ]:

	city	zipcode	area	bed	bath	sqft	price	Zestimate	type	built	Lastremodel	year1	price1	year2	price2	year3	price3	year4	price4	year5	price5
0	Fremont	94555.0	Northgate	4.0	1.0	1400.0	140000.0	986436.0	Single Family	47.0	47.0	1999-06-01	292000.0	NaN	140000.0	NaN	NaN	NaN	NaN	NaN	NaN
1	Fremont	94538.0	Downtown	2.0	2.0	1042.0	160000.0	535006.0	Condo	45.0	45.0	NaN	225000.0	2017-04-28	160000.0	NaN	NaN	NaN	NaN	NaN	NaN
2	Fremont	94539.0	Mission Valley	3.0	2.0	1434.0	130000.0	1320951.0	Single Family	56.0	55.0	2017-04-25	130000.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	Fremont	94538.0	Sundale	3.0	1.0	948.0	141500.0	668271.0	Single Family	58.0	56.0	2000-06-26	329000.0	2017-04-21	141500.0	NaN	NaN	NaN	NaN	NaN	NaN
4	Fremont	94538.0	Blacow	2.0	2.5	1157.0	60000.0	710981.0	Condo	23.0	23.0	1995-02-17	173000.0	2000-02-29	244500.0	2015-07-21	605000.0	2017-03-31	60000.0	NaN	NaN

	feature	importance
0	sqft	5.035661
1	built	0.886621
3	city_Mountain View	0.575613
6	city_San Jose	0.414703
20	zipcode_94087.0	0.293658
206	type_Single Family	0.240179
5	city_Redwood City	0.230413
52	zipcode_95129.0	0.191835
7	city_San Mateo	0.172116
9	city_Sunnyvale	0.156342
188	area_West San Jose	0.119842
28	zipcode_94539.0	0.114076
191	area_Willow Glen	0.107768
23	zipcode_94402.0	0.106149
106	area_East San Jose	0.089230
8	city_Santa Clara	0.082126
2	city_Fremont	0.076372
31	zipcode_94587.0	0.074628
43	zipcode_95120.0	0.073314
19	zipcode_94086.0	0.056367

	feature	importance
0	sqft	5.344135
1	built	1.106410
2	city_Mountain View	0.596193
3	city_San Jose	0.425648
4	zipcode_94087.0	0.303222
5	type_Single Family	0.290352
6	city_Redwood City	0.254234
7	zipcode_95129.0	0.195000
8	city_San Mateo	0.192148
9	city_Sunnyvale	0.179715
17	zipcode_94587.0	0.136951
12	area_Willow Glen	0.136892
13	zipcode_94402.0	0.132637
10	area_West San Jose	0.128530
11	zipcode_94539.0	0.122857
15	city_Santa Clara	0.108916
14	area_East San Jose	0.100851
16	city_Fremont	0.087946
18	zipcode_95120.0	0.087644
19	zipcode_94086.0	0.069718