In [1]:

    
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import json

Load the data from our JSON file.

The data is stored as a dictionary of dictionaries in the json file. We store it that way beacause it's easy to add data to the existing master data file. Also, I haven't figured out how to get it in a database yet.



In [2]:

    
with open('../pipeline/data/Day90ApartmentData.json') as f:
    my_dict1 = json.load(f)



In [9]:

    
with open('../pipeline/data/ProcessedDay90ApartmentData.json') as g:
    my_dict2 = json.load(g)



In [4]:

    
dframe1 = DataFrame(my_dict1)
dframe1 = dframe1.T
dframe1 = dframe1[['content', 'laundry',  'price', 'dog', 'bed', 
'bath', 'feet', 'long', 'parking', 'lat', 'smoking', 'getphotos', 
'cat', 'hasmap', 'wheelchair', 'housingtype']]
dframe1.head(50)









    Out[4]:






  
    
      
      content
      laundry
      price
      dog
      bed
      bath
      feet
      long
      parking
      lat
      smoking
      getphotos
      cat
      hasmap
      wheelchair
      housingtype
    
  
  
    
      5466720487
      1451
      w/d hookups
      1295
      0
      2
      1.5
      1064
      -122.5
      attached garage
      45.5472
      NaN
      16
      0
      1
      NaN
      townhouse
    
    
      5466933774
      812
      w/d in unit
      1775
      0
      2
      1
      1000
      -122.585
      attached garage
      45.5129
      no smoking
      7
      0
      1
      NaN
      house
    
    
      5466959791
      802
      w/d in unit
      3038
      1
      1
      1
      1043
      -122.683
      off-street parking
      45.5237
      NaN
      9
      1
      1
      wheelchair accessible
      apartment
    
    
      5466972226
      1482
      w/d in unit
      2000
      0
      1
      1
      842
      -122.685
      attached garage
      45.5293
      no smoking
      10
      0
      1
      wheelchair accessible
      apartment
    
    
      5466974707
      720
      w/d hookups
      2250
      1
      2
      1.5
      1500
      -122.631
      attached garage
      45.4982
      no smoking
      0
      1
      1
      NaN
      townhouse
    
    
      5466980870
      466
      laundry on site
      1250
      0
      3
      1
      1070
      -122.504
      attached garage
      45.5169
      no smoking
      0
      0
      1
      NaN
      house
    
    
      5466984497
      934
      w/d hookups
      1550
      0
      2
      1
      720
      NaN
      off-street parking
      NaN
      no smoking
      6
      0
      0
      NaN
      house
    
    
      5466985663
      1167
      NaN
      1800
      0
      2
      1
      980
      NaN
      street parking
      NaN
      no smoking
      6
      0
      0
      NaN
      house
    
    
      5466992256
      1690
      laundry in bldg
      1545
      1
      2
      1
      1100
      -122.689
      street parking
      45.5198
      NaN
      22
      1
      1
      NaN
      apartment
    
    
      5467011165
      1913
      laundry in bldg
      2900
      0
      3
      2
      2000
      -122.644
      NaN
      45.4753
      NaN
      22
      0
      1
      NaN
      house
    
    
      5467015913
      967
      w/d in unit
      1550
      0
      2
      1
      1300
      -122.575
      attached garage
      45.4872
      NaN
      16
      1
      1
      NaN
      house
    
    
      5467017550
      802
      w/d in unit
      3038
      1
      1
      1
      1043
      -122.683
      off-street parking
      45.5237
      NaN
      9
      1
      1
      wheelchair accessible
      apartment
    
    
      5467030959
      967
      w/d in unit
      1350
      0
      2
      1
      999
      -122.568
      off-street parking
      45.4788
      no smoking
      13
      0
      1
      NaN
      condo
    
    
      5467035243
      966
      w/d in unit
      1100
      0
      1
      1
      633
      -122.568
      off-street parking
      45.4788
      no smoking
      10
      0
      1
      NaN
      condo
    
    
      5467036762
      926
      w/d in unit
      1100
      0
      1
      1
      633
      -122.568
      off-street parking
      45.4788
      no smoking
      10
      0
      1
      NaN
      apartment
    
    
      5467038342
      966
      w/d in unit
      1100
      0
      1
      1
      636
      -122.568
      off-street parking
      45.4788
      no smoking
      10
      0
      1
      NaN
      condo
    
    
      5467045929
      930
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467046684
      931
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467047748
      931
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467048989
      930
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467050450
      838
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467051170
      838
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467051853
      838
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467052685
      838
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      8
      1
      1
      wheelchair accessible
      apartment
    
    
      5467054294
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467055048
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467055808
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467056936
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467063250
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467064025
      917
      w/d in unit
      1600
      1
      1
      1
      700
      -122.703
      street parking
      45.5303
      no smoking
      7
      1
      1
      NaN
      apartment
    
    
      5467067307
      915
      laundry in bldg
      949
      0
      0
      1
      400
      -122.697
      street parking
      45.5248
      no smoking
      10
      1
      1
      NaN
      apartment
    
    
      5467076144
      1496
      w/d in unit
      1150
      0
      3
      2.5
      1350
      NaN
      NaN
      NaN
      NaN
      17
      0
      0
      NaN
      townhouse
    
    
      5467091987
      1258
      w/d in unit
      2295
      0
      2
      1
      1000
      -122.655
      off-street parking
      45.56
      NaN
      13
      0
      1
      NaN
      house
    
    
      5467111279
      2654
      w/d in unit
      2650
      1
      2
      2
      1226
      NaN
      NaN
      NaN
      no smoking
      10
      1
      0
      NaN
      apartment
    
    
      5467127612
      754
      w/d in unit
      2171
      1
      2
      2
      1272
      -122.678
      attached garage
      45.5142
      NaN
      0
      1
      1
      NaN
      apartment
    
    
      5467141009
      816
      w/d in unit
      1517
      1
      3
      2
      1178
      -122.401
      NaN
      45.5165
      NaN
      8
      1
      1
      NaN
      apartment
    
    
      5467143866
      698
      w/d in unit
      1251
      1
      2
      2
      1050
      -122.401
      NaN
      45.5165
      NaN
      8
      1
      1
      NaN
      apartment
    
    
      5467145676
      350
      w/d in unit
      1500
      0
      1
      1
      900
      -122.679
      street parking
      45.4915
      NaN
      11
      0
      1
      NaN
      house
    
    
      5467147852
      1930
      w/d in unit
      1895
      0
      3
      1.5
      1623
      -122.723
      detached garage
      45.4147
      NaN
      19
      0
      1
      NaN
      apartment
    
    
      5467156094
      1279
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      8
      0
      1
      NaN
      apartment
    
    
      5467157129
      1279
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      3
      0
      1
      NaN
      apartment
    
    
      5467157751
      1192
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      8
      0
      1
      NaN
      apartment
    
    
      5467158644
      1192
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      6
      0
      1
      NaN
      apartment
    
    
      5467159647
      1273
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      no smoking
      7
      0
      1
      NaN
      apartment
    
    
      5467161388
      717
      w/d in unit
      1525
      1
      1
      1
      900
      -122.616
      detached garage
      45.515
      no smoking
      0
      1
      1
      NaN
      duplex
    
    
      5467165113
      1298
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      7
      0
      1
      NaN
      apartment
    
    
      5467172118
      1244
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      7
      0
      1
      NaN
      apartment
    
    
      5467172887
      992
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      0
      0
      1
      NaN
      apartment
    
    
      5467174019
      1279
      w/d hookups
      995
      0
      2
      2
      890
      -122.568
      off-street parking
      45.499
      NaN
      4
      0
      1
      NaN
      apartment
    
    
      5467175428
      802
      w/d in unit
      3038
      1
      1
      1
      1043
      -122.683
      off-street parking
      45.5237
      NaN
      9
      1
      1
      wheelchair accessible
      apartment



In [12]:

    
dframe2 = DataFrame(my_dict2)
dframe2 = dframe2.T
dframe2 = dframe2[['content', 'laundry',  'price', 'dog', 'bed', 
'bath', 'feet', 'long', 'parking', 'lat', 'smoking', 'getphotos', 
'cat', 'hasmap', 'wheelchair', 'housingtype']]
dframe2.describe()









    Out[12]:






  
    
      
      content
      laundry
      price
      dog
      bed
      bath
      feet
      long
      parking
      lat
      smoking
      getphotos
      cat
      hasmap
      wheelchair
      housingtype
    
  
  
    
      count
      32223
      32223
      32223
      32223
      32223
      32223
      32223.000000
      32223.000000
      32223
      32223.00000
      32223
      32223
      32223
      32223
      32223
      32223
    
    
      unique
      3873
      5
      2004
      2
      9
      17
      1461.000000
      6131.000000
      7
      6000.00000
      2
      24
      2
      2
      1
      11
    
    
      top
      967
      w/d in unit
      995
      1
      1
      1
      903.863061
      -122.631076
      off-street parking
      45.51843
      no smoking
      8
      1
      1
      no wheelchair access
      apartment
    
    
      freq
      174
      21669
      678
      20511
      12459
      22550
      3764.000000
      1829.000000
      16284
      1829.00000
      19621
      3224
      21850
      30434
      32223
      25285



In [16]:

    
dframe = pd.get_dummies(dframe2, columns = ['laundry', 'parking', 'smoking', 'wheelchair', 'housingtype'])



In [18]:

    
pd.set_option('display.max_columns', 500)
dframe









    Out[18]:






  
    
      
      content
      price
      dog
      bed
      bath
      feet
      long
      lat
      getphotos
      cat
      hasmap
      laundry_laundry in bldg
      laundry_laundry on site
      laundry_no laundry on site
      laundry_w/d hookups
      laundry_w/d in unit
      parking_attached garage
      parking_carport
      parking_detached garage
      parking_no parking
      parking_off-street parking
      parking_street parking
      parking_valet parking
      smoking_no smoking
      smoking_smoking
      wheelchair_no wheelchair access
      housingtype_apartment
      housingtype_condo
      housingtype_cottage/cabin
      housingtype_duplex
      housingtype_flat
      housingtype_house
      housingtype_in-law
      housingtype_land
      housingtype_loft
      housingtype_manufactured
      housingtype_townhouse
    
  
  
    
      5466720487
      1451
      1295
      0
      2
      1.5
      1064
      -122.5
      45.5472
      16
      0
      1
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      5466933774
      812
      1775
      0
      2
      1
      1000
      -122.585
      45.5129
      7
      0
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5466959791
      802
      3038
      1
      1
      1
      1043
      -122.683
      45.5237
      9
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5466972226
      1482
      2000
      0
      1
      1
      842
      -122.685
      45.5293
      10
      0
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5466974707
      720
      2250
      1
      2
      1.5
      1500
      -122.631
      45.4982
      0
      1
      1
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      5466980870
      466
      1250
      0
      3
      1
      1070
      -122.504
      45.5169
      0
      0
      1
      0
      1
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5466984497
      934
      1550
      0
      2
      1
      720
      -122.631
      45.5184
      6
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5466985663
      1167
      1800
      0
      2
      1
      980
      -122.631
      45.5184
      6
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5466992256
      1690
      1545
      1
      2
      1
      1100
      -122.689
      45.5198
      22
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467011165
      1913
      2900
      0
      3
      2
      2000
      -122.644
      45.4753
      22
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5467015913
      967
      1550
      0
      2
      1
      1300
      -122.575
      45.4872
      16
      1
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5467017550
      802
      3038
      1
      1
      1
      1043
      -122.683
      45.5237
      9
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467030959
      967
      1350
      0
      2
      1
      999
      -122.568
      45.4788
      13
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467035243
      966
      1100
      0
      1
      1
      633
      -122.568
      45.4788
      10
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467036762
      926
      1100
      0
      1
      1
      633
      -122.568
      45.4788
      10
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467038342
      966
      1100
      0
      1
      1
      636
      -122.568
      45.4788
      10
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467045929
      930
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467046684
      931
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467047748
      931
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467048989
      930
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467050450
      838
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467051170
      838
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467051853
      838
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467052685
      838
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467054294
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467055048
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467055808
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467056936
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467063250
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5467064025
      917
      1600
      1
      1
      1
      700
      -122.703
      45.5303
      7
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      5606880234
      3411
      1671
      1
      1
      1
      649
      -122.696980
      45.534812
      16
      1
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606883048
      297
      1050
      1
      3
      2
      903.863
      -122.684200
      45.574200
      0
      1
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5606884655
      2279
      1870
      0
      1
      1
      810
      -122.685431
      45.530745
      18
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606886478
      3157
      1595
      0
      1
      1
      630
      -122.680849
      45.507958
      15
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606887612
      2889
      1575
      1
      1
      1
      655
      -122.673941
      45.484748
      5
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606889969
      4207
      1695
      0
      2
      1
      1165
      -122.707400
      45.458000
      11
      0
      1
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
    
    
      5606890857
      2022
      1195
      1
      2
      1
      950
      -122.554904
      45.535897
      13
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606896951
      2716
      1768
      1
      2
      2
      1071
      -122.776460
      45.536405
      18
      1
      1
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606906826
      1318
      1095
      0
      2
      1
      900
      -122.534763
      45.503154
      8
      0
      1
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      5606908729
      3028
      1525
      0
      1
      1
      670
      -122.680232
      45.507955
      14
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606909576
      3405
      1495
      1
      1
      1
      631
      -122.696980
      45.534812
      18
      1
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606913318
      3247
      1707
      1
      1
      1
      588
      -122.683523
      45.533256
      14
      1
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606919189
      3024
      2995
      0
      2
      2
      1250
      -122.680232
      45.507955
      16
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606920335
      2330
      1295
      1
      2
      2
      951
      -122.564504
      45.466749
      6
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606920501
      2352
      1751
      1
      1
      1
      711
      -122.680495
      45.527268
      5
      1
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606923847
      575
      855
      0
      4
      2
      903.863
      -122.681964
      45.509291
      4
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606924783
      959
      1050
      1
      1
      1
      619
      -122.415265
      45.511248
      8
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606931168
      3071
      1630
      1
      2
      2
      1004
      -122.631
      45.5184
      13
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606931449
      1202
      1550
      1
      2
      1.5
      1000
      -122.693027
      45.569787
      12
      1
      1
      0
      0
      1
      0
      0
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606937122
      2481
      1350
      1
      1
      1
      638
      -122.682255
      45.559892
      0
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606937191
      3025
      1773
      1
      2
      2
      1071
      -122.631
      45.5184
      13
      1
      0
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606949108
      2264
      1685
      1
      1
      1
      693
      -122.670905
      45.495471
      21
      1
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606949496
      2856
      1714
      1
      2
      2
      1071
      -122.776460
      45.536405
      13
      1
      1
      0
      0
      0
      0
      1
      0
      1
      0
      0
      0
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606953826
      2059
      1532
      1
      1
      1
      665
      -122.684580
      45.516096
      13
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606955106
      4719
      1430
      1
      1
      1
      615
      -122.641516
      45.463959
      19
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606957466
      1978
      2120
      0
      2
      2
      865
      -122.683258
      45.514693
      9
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606959648
      1311
      1765
      1
      1
      1
      699
      -122.694711
      45.534141
      21
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606963450
      2144
      1095
      0
      0
      1
      367
      -122.688819
      45.510238
      14
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606967296
      2490
      1598
      1
      1
      1
      785
      -122.675541
      45.550538
      11
      1
      1
      0
      0
      0
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5606978869
      2717
      2527
      0
      2
      2
      966
      -122.682824
      45.529644
      13
      0
      1
      0
      0
      0
      0
      1
      1
      0
      0
      0
      0
      0
      0
      0
      1
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

32223 rows × 37 columns



In [21]:

    
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
	dframe.drop('price', axis = 1), dframe.price, test_size=0.33)



In [ ]:



In [ ]:

    
def listing_cleaner(entry):
    print entry



In [13]:

    
df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
                    'C': [1, 2, 3]})
df



In [15]:

    
pd.get_dummies(df, columns=['A','C'])



In [ ]:

    
listing_cleaner(my_dict['5465197037'])



In [ ]:

    
type(dframe['bath']['5399866740'])

Clean up the data a bit

Right now the 'shared' and 'split' are included in number of bathrooms. If I were to convert that to a number I would consider a shared/split bathroom to be half or 0.5 of a bathroom.



In [ ]:

    
dframe.bath = dframe.bath.replace('shared',0.5)
dframe.bath = dframe.bath.replace('split',0.5)
dframe.smoking = dframe.smoking.replace(np.nan, 'smoking')
dframe.furnished = dframe.furnished.replace(np.nan,'not furnished')
dframe.wheelchair = dframe.wheelchair.replace(np.nan, 'not wheelchair accessible')



In [ ]:

    
dframe.describe()



In [ ]:

    
dframe.bed.unique()



In [ ]:

    
from sklearn.preprocessing import Imputer, LabelEncoder



In [ ]:

    
def meanimputer(column):
    imp = Imputer(missing_values='NaN', strategy='mean', axis=1)
    imp.fit(column)
    X = imp.transform(column)
    return X[0]



In [ ]:

    
arr = np.array([np.nan, 'house', 'boat', 'houseboat', 'house', np.nan, 'house','houseboat'])
prac_df = DataFrame()
prac_df['arr'] = arr
prac_df['arr']
modeimputer(prac_df['arr'])



In [ ]:

    
def modeimputer(column):

    le = LabelEncoder()
    column = le.fit_transform(column)
    print le.classes_
    print type(le.classes_[0])
    print column
    nan = le.transform([np.nan])[0]
    print nan
    print type(column)
    column = list(column)
    for _,i in enumerate(column):
        if i == nan:
            column[_] = np.nan
    
    imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
    imp.fit(column)

    X = imp.transform(column)
    
    for _,i in enumerate(X[0]):
        if np.isnan(i):
            X[_] = 0
    X = X.astype(int)


    Y = le.inverse_transform(X)

    return Y



In [ ]:

    
import pandas as pd
import numpy as np

from sklearn.base import TransformerMixin
class ModeImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.

        Columns of dtype object are imputed with the most frequent value 
        in column.

        Columns of other types are imputed with mean of column.
        
        Credit:http://stackoverflow.com/questions/25239958/
        impute-categorical-missing-values-in-scikit-learn

        """
    def fit(self, X, y=None):

        self.fill = pd.Series([X[c].value_counts().index[0]
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
            index=X.columns)

        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)



In [ ]:

    
data = [
    ['a', 1, 2],
    ['b', 1, 1],
    ['b', 2, 2],
    [np.nan, np.nan, np.nan]
]

X = pd.DataFrame(data)
xt = ModeImputer().fit_transform(X)

print('before...')
print(X)
print('after...')
print(xt)



In [ ]:

    
dframe = ModeImputer().fit_transform(dframe)



In [ ]:

    
dframe.head()



In [ ]:

    
dframe.describe(include = 'all')



In [ ]:

    
dframe.bed.mean()



In [ ]:

    
dframe.parking.unique()



In [ ]:

    
u_dframe = DataFrame()
dframe['bath'] = meanimputer(dframe['bath'])
dframe['bed'] = meanimputer(dframe['bed'])
dframe['feet'] = meanimputer(dframe['feet'])
dframe['lat'] = meanimputer(dframe['lat'])
dframe['long'] = meanimputer(dframe['long'])



In [ ]:

    
dframe.head()



In [ ]:

    
dframe.describe(include='all')



In [ ]:

    
data = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
plt.figure(figsize=(15,10))
plt.scatter(data = data, x = 'long',y='lat')

It looks like Portland!!!

Let's cluster the data. Start by creating a list of [['lat','long'], ...]



In [ ]:

    
XYdf = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
data = [[XYdf['lat'][i],XYdf['long'][i]] for i in XYdf.index]

We'll use K Means Clustering because that's the clustering method I recently learned in class! There may be others that work better, but this is the tool that I know



In [ ]:

    
from sklearn.cluster import KMeans
km = KMeans(n_clusters=40)
km.fit(data)
neighborhoods = km.cluster_centers_



In [ ]:

    
%pylab inline
figure(1,figsize=(20,12))
plot([row[1] for row in data],[row[0] for row in data],'b.')
for i in km.cluster_centers_:  
    plot(i[1],i[0], 'g*',ms=25)
'''Note to Riley: come back and make it look pretty'''

We chose our neighborhoods!

I've found that every once in a while the centers end up in different points, but are fairly consistant. Now let's process our data points and figure out where the closest neighborhood center is to it!



In [ ]:

    
neighborhoods = neighborhoods.tolist()
for i in enumerate(neighborhoods):
    i[1].append(i[0])
print neighborhoods

Create a function that will label each point with a number coresponding to it's neighborhood



In [ ]:

    
def clusterer(X, Y,neighborhoods):
    neighbors = []
    for i in neighborhoods:
        distance = ((i[0]-X)**2 + (i[1]-Y)**2)
        neighbors.append(distance)
    closest = min(neighbors)
    return neighbors.index(closest)



In [ ]:

    
neighborhoodlist = []
for i in dframe.index:
    neighborhoodlist.append(clusterer(dframe['lat'][i],dframe['long'][i],neighborhoods))
dframe['neighborhood'] = neighborhoodlist



In [ ]:

    
dframe

Here's the new Part. We're breaking out the neighborhood values into their own columns. Now the algorithms can read them as categorical data rather than continuous data.



In [ ]:



In [ ]:

    
from sklearn import preprocessing
def CategoricalToBinary(dframe,column_name):
    le = preprocessing.LabelEncoder()
    listy = le.fit_transform(dframe[column_name])
    dframe[column_name] = listy
    unique = dframe[column_name].unique()
    serieslist = [list() for _ in xrange(len(unique))]
    
    
    for column, _ in enumerate(serieslist):
        for i, item in enumerate(dframe[column_name]):
            if item == column:
                serieslist[column].append(1)
            else:
                serieslist[column].append(0)
        dframe[column_name+str(column)] = serieslist[column]

   
    return dframe



In [ ]:

    
pd.set_option('max_columns', 100)
dframe = CategoricalToBinary(dframe,'housingtype')
dframe = CategoricalToBinary(dframe,'parking')
dframe = CategoricalToBinary(dframe,'laundry')
dframe = CategoricalToBinary(dframe,'smoking')
dframe = CategoricalToBinary(dframe,'wheelchair')
dframe = CategoricalToBinary(dframe,'neighborhood')
dframe



In [ ]:

    
dframe = dframe.drop('date',1)
dframe = dframe.drop('housingtype',1)
dframe = dframe.drop('parking',1)
dframe = dframe.drop('laundry',1)
dframe = dframe.drop('smoking',1)
dframe = dframe.drop('wheelchair',1)
dframe = dframe.drop('neighborhood',1)
dframe = dframe.drop('time',1)



In [ ]:

    
columns=list(dframe.columns)



In [ ]:

    
from __future__ import division
print len(dframe)
df2 = dframe[dframe.price < 10000][columns].dropna()
print len(df2)
print len(df2)/len(dframe)

price = df2[['price']].values
columns.pop(columns.index('price'))
features = df2[columns].values

from sklearn.cross_validation import train_test_split
features_train, features_test, price_train, price_test = train_test_split(features, price, test_size=0.1, random_state=42)

Ok, lets put it through Decision Tree!

What about Random Forest?



In [ ]:

    
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
reg = RandomForestRegressor()
reg = reg.fit(features_train, price_train)



In [ ]:

    
forest_pred = reg.predict(features_test)
forest_pred = np.array([[item] for item in forest_pred])



In [ ]:

    
print r2_score(forest_pred, price_test)
plt.scatter(forest_pred,price_test)



In [ ]:

    
df2['predictions'] = reg.predict(df2[columns])



In [ ]:

    
df2['predictions_diff'] = df2['predictions']-df2['price']



In [ ]:

    
sd = np.std(df2['predictions_diff'])
sns.kdeplot(df2['predictions_diff'][df2['predictions_diff']>-150][df2['predictions_diff']<150])
sns.plt.xlim(-150,150)



In [ ]:

    
data = df2[dframe.lat > 45.45][df2.lat < 45.6][df2.long < -122.4][df2.long > -122.8][df2['predictions_diff']>-150][df2['predictions_diff']<150]
plt.figure(figsize=(15,10))
plt.scatter(data = data, x = 'long',y='lat', c = 'predictions_diff',s=10,cmap='coolwarm')



In [ ]:

    
dframe



In [ ]:

    
print np.mean([1,2,34,np.nan])



In [ ]:

    
def averager(dframe):
    dframe = dframe.T
    dframe.dropna()
    averages = {}
    for listing in dframe:
        try:
            key = str(dframe[listing]['bed'])+','+str(dframe[listing]['bath'])+','+str(dframe[listing]['neighborhood'])+','+str(dframe[listing]['feet']-dframe[listing]['feet']%50)
            if key not in averages:
                averages[key] = {'average_list':[dframe[listing]['price']], 'average':0}
            elif key in averages:
                averages[key]['average_list'].append(dframe[listing]['price'])
        except TypeError:
            continue
    for entry in averages:
        averages[entry]['average'] = np.mean(averages[entry]['average_list'])
    return averages



In [ ]:

    
averages = averager(dframe)
print averages



In [ ]:

    
dframe['averages']= averages[str(dframe['bed'])+','+str(dframe['bath'])+','+str(dframe['neighborhood'])+','+str(dframe['feet']-dframe['feet']%50)]



In [ ]:

    
dframe.T

Wow! up to .87! That's our best yet! What if we add more trees???



In [ ]:

    
reg = RandomForestRegressor(n_estimators = 100)
reg = reg.fit(features_train, price_train)



In [ ]:

    
forest_pred = reg.predict(features_test)
forest_pred = np.array([[item] for item in forest_pred])



In [ ]:

    
print r2_score(forest_pred, price_test)
print plt.scatter(pred,price_test)



In [ ]:



In [ ]:

    
from sklearn.tree import DecisionTreeRegressor
reg = DecisionTreeRegressor(max_depth = 5)
reg.fit(features_train, price_train)
print len(features_train[0])
columns = [str(x) for x in columns]
print columns
from sklearn.tree import export_graphviz
export_graphviz(reg,feature_names=columns)

Up to .88!

So what is our goal now? I'd like to see if adjusting the number of neighborhoods increases the accuracy. same for the affect with the number of trees



In [ ]:

    
def neighborhood_optimizer(dframe,neighborhood_number_range, counter_num):
    XYdf = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
    data = [[XYdf['lat'][i],XYdf['long'][i]] for i in XYdf.index]
    r2_dict = []
    for i in neighborhood_number_range:
        counter = counter_num
        average_accuracy_list = []
        while counter > 0:
            km = KMeans(n_clusters=i)
            km.fit(data)
            neighborhoods = km.cluster_centers_
            neighborhoods = neighborhoods.tolist()
            for x in enumerate(neighborhoods):
                x[1].append(x[0])
            neighborhoodlist = []
            for z in dframe.index:
                neighborhoodlist.append(clusterer(dframe['lat'][z],dframe['long'][z],neighborhoods))
            dframecopy = dframe.copy()
            dframecopy['neighborhood'] = Series((neighborhoodlist), index=dframe.index)
            df2 = dframecopy[dframe.price < 10000][['bath','bed','feet','dog','cat','content','getphotos', 'hasmap', 'price','neighborhood']].dropna()
            features = df2[['bath','bed','feet','dog','cat','content','getphotos', 'hasmap', 'neighborhood']].values
            price = df2[['price']].values
            features_train, features_test, price_train, price_test = train_test_split(features, price, test_size=0.1)
            reg = RandomForestRegressor()
            reg = reg.fit(features_train, price_train)
            forest_pred = reg.predict(features_test)
            forest_pred = np.array([[item] for item in forest_pred])
            counter -= 1
            average_accuracy_list.append(r2_score(forest_pred, price_test))
        total = 0
        for entry in average_accuracy_list:
            total += entry
        r2_accuracy = total/len(average_accuracy_list)
        r2_dict.append((i,r2_accuracy))
    print r2_dict
    return r2_dict



In [ ]:

    
neighborhood_number_range = [i for _,i in enumerate(range(2,31,2))]
neighborhood_number_range



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,neighborhood_number_range,10)



In [ ]:

    
r2_dict[:][0]



In [ ]:

    
plt.scatter([x[0] for x in r2_dict],[x[1] for x in r2_dict])

Looks like the optimum is right around 10 or 11, and then starts to drop off. Let's get a little more granular and look at a smaller range



In [ ]:

    
neighborhood_number_range = [i for _,i in enumerate(range(7,15))]
neighborhood_number_range



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,neighborhood_number_range,10)



In [ ]:

    
print r2_dict
plt.scatter([x[0] for x in r2_dict],[x[1] for x in r2_dict])

Trying a few times, it looks like 10, 11 and 12 get the best results at ~.85. Of course, we'll need to redo some of these optomizations after we properly process our data. Hopefully we'll see some more consistency then too.



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,[10,11,12],25)

Note #1 to Riley: (From Last time) Perhaps look into another regressor? see if there's one that's inherantly better at this kind of thing.

Note #2 to Riley: Figure out how to process data so that you don't have to drop null values

Note #3 to Riley: convert categorical data into binary

Note #4 to Riley: I wonder if increasing the number of neighborhoods would become more accurate as we collect more data? like you could create a bunch of little accurate models instead of a bunch of bigger ones.

Learned: If you plan on using Decision Tree/Random Forest from SKLearn, make sure you collect your discrete variables in separate columns and make them binary yes or no(0 or 1).

	B	A_a	A_b	C_1	C_2	C_3
0	b	1	0	1	0	0
1	a	0	1	0	1	0
2	c	1	0	0	0	1

	content	laundry	price	dog	bed	bath	feet	long	parking	lat	smoking	getphotos	cat	hasmap	wheelchair	housingtype
5466720487	1451	w/d hookups	1295	0	2	1.5	1064	-122.5	attached garage	45.5472	NaN	16	0	1	NaN	townhouse
5466933774	812	w/d in unit	1775	0	2	1	1000	-122.585	attached garage	45.5129	no smoking	7	0	1	NaN	house
5466959791	802	w/d in unit	3038	1	1	1	1043	-122.683	off-street parking	45.5237	NaN	9	1	1	wheelchair accessible	apartment
5466972226	1482	w/d in unit	2000	0	1	1	842	-122.685	attached garage	45.5293	no smoking	10	0	1	wheelchair accessible	apartment
5466974707	720	w/d hookups	2250	1	2	1.5	1500	-122.631	attached garage	45.4982	no smoking	0	1	1	NaN	townhouse
5466980870	466	laundry on site	1250	0	3	1	1070	-122.504	attached garage	45.5169	no smoking	0	0	1	NaN	house
5466984497	934	w/d hookups	1550	0	2	1	720	NaN	off-street parking	NaN	no smoking	6	0	0	NaN	house
5466985663	1167	NaN	1800	0	2	1	980	NaN	street parking	NaN	no smoking	6	0	0	NaN	house
5466992256	1690	laundry in bldg	1545	1	2	1	1100	-122.689	street parking	45.5198	NaN	22	1	1	NaN	apartment
5467011165	1913	laundry in bldg	2900	0	3	2	2000	-122.644	NaN	45.4753	NaN	22	0	1	NaN	house
5467015913	967	w/d in unit	1550	0	2	1	1300	-122.575	attached garage	45.4872	NaN	16	1	1	NaN	house
5467017550	802	w/d in unit	3038	1	1	1	1043	-122.683	off-street parking	45.5237	NaN	9	1	1	wheelchair accessible	apartment
5467030959	967	w/d in unit	1350	0	2	1	999	-122.568	off-street parking	45.4788	no smoking	13	0	1	NaN	condo
5467035243	966	w/d in unit	1100	0	1	1	633	-122.568	off-street parking	45.4788	no smoking	10	0	1	NaN	condo
5467036762	926	w/d in unit	1100	0	1	1	633	-122.568	off-street parking	45.4788	no smoking	10	0	1	NaN	apartment
5467038342	966	w/d in unit	1100	0	1	1	636	-122.568	off-street parking	45.4788	no smoking	10	0	1	NaN	condo
5467045929	930	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467046684	931	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467047748	931	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467048989	930	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467050450	838	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467051170	838	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467051853	838	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467052685	838	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	8	1	1	wheelchair accessible	apartment
5467054294	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467055048	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467055808	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467056936	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467063250	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467064025	917	w/d in unit	1600	1	1	1	700	-122.703	street parking	45.5303	no smoking	7	1	1	NaN	apartment
5467067307	915	laundry in bldg	949	0	0	1	400	-122.697	street parking	45.5248	no smoking	10	1	1	NaN	apartment
5467076144	1496	w/d in unit	1150	0	3	2.5	1350	NaN	NaN	NaN	NaN	17	0	0	NaN	townhouse
5467091987	1258	w/d in unit	2295	0	2	1	1000	-122.655	off-street parking	45.56	NaN	13	0	1	NaN	house
5467111279	2654	w/d in unit	2650	1	2	2	1226	NaN	NaN	NaN	no smoking	10	1	0	NaN	apartment
5467127612	754	w/d in unit	2171	1	2	2	1272	-122.678	attached garage	45.5142	NaN	0	1	1	NaN	apartment
5467141009	816	w/d in unit	1517	1	3	2	1178	-122.401	NaN	45.5165	NaN	8	1	1	NaN	apartment
5467143866	698	w/d in unit	1251	1	2	2	1050	-122.401	NaN	45.5165	NaN	8	1	1	NaN	apartment
5467145676	350	w/d in unit	1500	0	1	1	900	-122.679	street parking	45.4915	NaN	11	0	1	NaN	house
5467147852	1930	w/d in unit	1895	0	3	1.5	1623	-122.723	detached garage	45.4147	NaN	19	0	1	NaN	apartment
5467156094	1279	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	8	0	1	NaN	apartment
5467157129	1279	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	3	0	1	NaN	apartment
5467157751	1192	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	8	0	1	NaN	apartment
5467158644	1192	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	6	0	1	NaN	apartment
5467159647	1273	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	no smoking	7	0	1	NaN	apartment
5467161388	717	w/d in unit	1525	1	1	1	900	-122.616	detached garage	45.515	no smoking	0	1	1	NaN	duplex
5467165113	1298	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	7	0	1	NaN	apartment
5467172118	1244	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	7	0	1	NaN	apartment
5467172887	992	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	0	0	1	NaN	apartment
5467174019	1279	w/d hookups	995	0	2	2	890	-122.568	off-street parking	45.499	NaN	4	0	1	NaN	apartment
5467175428	802	w/d in unit	3038	1	1	1	1043	-122.683	off-street parking	45.5237	NaN	9	1	1	wheelchair accessible	apartment

	content	laundry	price	dog	bed	bath	feet	long	parking	lat	smoking	getphotos	cat	hasmap	wheelchair	housingtype
count	32223	32223	32223	32223	32223	32223	32223.000000	32223.000000	32223	32223.00000	32223	32223	32223	32223	32223	32223
unique	3873	5	2004	2	9	17	1461.000000	6131.000000	7	6000.00000	2	24	2	2	1	11
top	967	w/d in unit	995	1	1	1	903.863061	-122.631076	off-street parking	45.51843	no smoking	8	1	1	no wheelchair access	apartment
freq	174	21669	678	20511	12459	22550	3764.000000	1829.000000	16284	1829.00000	19621	3224	21850	30434	32223	25285