In [2]:

    
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import json

Load the data from our JSON file.

The data is stored as a dictionary of dictionaries in the json file. We store it that way beacause it's easy to add data to the existing master data file. Also, I haven't figured out how to get it in a database yet.



In [3]:

    
with open('../pdxapartmentfinder/data/MasterApartmentData.json') as f:
    my_dict = json.load(f)
dframe = DataFrame(my_dict)

dframe = dframe.T
dframe.shape









    Out[3]:





(36303, 20)

Clean up the data a bit

Right now the 'shared' and 'split' are included in number of bathrooms. If I were to convert that to a number I would consider a shared/split bathroom to be half or 0.5 of a bathroom.



In [3]:

    
dframe.bath = dframe.bath.replace('shared',0.5)
dframe.bath = dframe.bath.replace('split',0.5)
dframe.smoking = dframe.smoking.replace(np.nan,0)

Let's get a look at what the prices look like

To visualize it we need to get rid of null values. I haven't figured out the best way to clean this up yet. For now I'm going to drop any rows that have a null value, though I recognize that this is not a good analysis practice. We ended up dropping ~15% of data points.

😬

Also there were some CRAZY outliers, and this analysis is focused on finding a model for apartments for the 99% of us that can't afford crazy extravigant apartments



In [4]:

    
df = dframe[dframe.price < 10000][['bath','bed','feet','price']].dropna()
sns.distplot(df.price)









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x115f24290>



In [5]:

    
dframe.head()









    Out[5]:






  
    
      
      bath
      bed
      cat
      content
      date
      dog
      feet
      getphotos
      hasmap
      housingtype
      lat
      laundry
      long
      parking
      price
      smoking
      time
      wheelchair
    
  
  
    
      5399866740
      1
      1
      0
      754
      2016-01-12
      0
      750
      8
      0
      apartment
      NaN
      w/d in unit
      NaN
      off-street parking
      1400
      0
      12:22:07
      NaN
    
    
      5401772970
      1
      1
      1
      2632
      2016-01-13
      1
      659
      7
      1
      apartment
      45.531
      w/d in unit
      -122.664
      attached garage
      1350
      no smoking
      16:24:11
      wheelchair accessible
    
    
      5402562933
      1.5
      NaN
      0
      1001
      2016-01-14
      0
      1
      2
      1
      apartment
      45.5333
      laundry on site
      -122.709
      carport
      1500
      no smoking
      09:12:40
      NaN
    
    
      5402607488
      1
      2
      0
      2259
      2016-01-14
      0
      936
      12
      1
      condo
      45.5328
      w/d in unit
      -122.699
      attached garage
      1995
      0
      09:36:16
      NaN
    
    
      5402822514
      1
      1
      0
      1110
      2016-01-14
      0
      624
      16
      1
      apartment
      45.5053
      w/d in unit
      -122.618
      street parking
      1495
      0
      11:31:03
      NaN



In [6]:

    
dframe.describe()









    Out[6]:






  
    
      
      bath
      bed
      cat
      content
      date
      dog
      feet
      getphotos
      hasmap
      housingtype
      lat
      laundry
      long
      parking
      price
      smoking
      time
      wheelchair
    
  
  
    
      count
      27929
      28712
      28910
      28910
      28910
      28910
      25349
      28910
      28910
      28894
      27600.0000
      27078
      27600.0000
      21598
      28673
      28910
      28910
      5789
    
    
      unique
      15
      9
      2
      3660
      109
      2
      1385
      24
      2
      11
      4741.0000
      5
      4845.0000
      7
      1908
      2
      20949
      1
    
    
      top
      1
      1
      1
      967
      2016-04-25
      1
      700
      8
      1
      apartment
      45.5142
      w/d in unit
      -122.6854
      off-street parking
      995
      no smoking
      10:05:48
      wheelchair accessible
    
    
      freq
      19375
      10832
      19847
      218
      508
      18652
      567
      2844
      27641
      22741
      391.0000
      17651
      366.0000
      7248
      652
      17888
      7
      5789



In [7]:

    
def meanimputer(column):
    imp = Imputer(missing_values='NaN', strategy='mean', axis=1)
    imp.fit(column)
    X = imp.transform(column)
    return X[0]



In [148]:

    
from sklearn import preprocessing
def modeimputer(column):

    le = preprocessing.LabelEncoder()
    column = le.fit_transform(column)
    print type(le.classes_[0])
    nan = le.transform([np.nan])[0]
    
    column = list(column)
    for _,i in enumerate(column):
        if i == nan:
            column[_] = np.nan
    
    imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
    imp.fit(column)

    X = imp.transform(column)
    
    for _,i in enumerate(X[0]):
        if np.isnan(i):
            X[_] = 0
    X = X.astype(int)


    Y = le.inverse_transform(X)

    return Y



In [159]:

    
arr = np.array([np.nan, 'house', 'boat', 'houseboat', 'house', np.nan, 'house','houseboat'])
prac_df = DataFrame()
prac_df['arr'] = arr
prac_df['arr']
modeimputer(prac_df['arr'])









    



<type 'str'>






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-159-ccaf81b29b98> in <module>()
      3 prac_df['arr'] = arr
      4 prac_df['arr']
----> 5 modeimputer(prac_df['arr'])

<ipython-input-148-79c7c2143c47> in modeimputer(column)
      5     column = le.fit_transform(column)
      6     print type(le.classes_[0])
----> 7     nan = le.transform([np.nan])[0]
      8 
      9     column = list(column)

/Users/mac28/anaconda/lib/python2.7/site-packages/sklearn/preprocessing/label.pyc in transform(self, y)
    148         if len(np.intersect1d(classes, self.classes_)) < len(classes):
    149             diff = np.setdiff1d(classes, self.classes_)
--> 150             raise ValueError("y contains new labels: %s" % str(diff))
    151         return np.searchsorted(self.classes_, y)
    152 

ValueError: y contains new labels: [ nan]



In [ ]:



In [145]:

    
pd.isnull('nan')









    Out[145]:





False



In [158]:

    
u_dframe = DataFrame()
dframe['bath'] = meanimputer(dframe['bath'])
dframe['bed'] = meanimputer(dframe['bed'])
dframe['feet'] = meanimputer(dframe['feet'])
dframe['lat'] = meanimputer(dframe['lat'])
dframe['long'] = meanimputer(dframe['long'])


dframe['housingtype'] = modeimputer(dframe['housingtype'])
dframe['laundry'] = modeimputer(dframe['laundry'])
dframe['parking'] = modeimputer(dframe['parking'])
dframe['wheelchair'] = modeimputer(dframe['wheelchair'])









    



<type 'float'>






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-158-4e52685b345d> in <module>()
      7 
      8 
----> 9 dframe['housingtype'] = modeimputer(dframe['housingtype'])
     10 dframe['laundry'] = modeimputer(dframe['laundry'])
     11 dframe['parking'] = modeimputer(dframe['parking'])

<ipython-input-148-79c7c2143c47> in modeimputer(column)
      5     column = le.fit_transform(column)
      6     print type(le.classes_[0])
----> 7     nan = le.transform([np.nan])[0]
      8 
      9     column = list(column)

/Users/mac28/anaconda/lib/python2.7/site-packages/sklearn/preprocessing/label.pyc in transform(self, y)
    148         if len(np.intersect1d(classes, self.classes_)) < len(classes):
    149             diff = np.setdiff1d(classes, self.classes_)
--> 150             raise ValueError("y contains new labels: %s" % str(diff))
    151         return np.searchsorted(self.classes_, y)
    152 

ValueError: y contains new labels: [ nan]



In [10]:

    
dframe.head()









    Out[10]:






  
    
      
      bath
      bed
      cat
      content
      date
      dog
      feet
      getphotos
      hasmap
      housingtype
      lat
      laundry
      long
      parking
      price
      smoking
      time
      wheelchair
    
  
  
    
      5399866740
      1.0
      1.00000
      0
      754
      2016-01-12
      0
      750
      8
      0
      apartment
      45.518067
      w/d in unit
      -122.629399
      NaN
      1400
      0
      12:22:07
      NaN
    
    
      5401772970
      1.0
      1.00000
      1
      2632
      2016-01-13
      1
      659
      7
      1
      apartment
      45.531019
      w/d in unit
      -122.664049
      attached garage
      1350
      no smoking
      16:24:11
      wheelchair accessible
    
    
      5402562933
      1.5
      1.43292
      0
      1001
      2016-01-14
      0
      1
      2
      1
      apartment
      45.533334
      laundry on site
      -122.708927
      carport
      1500
      no smoking
      09:12:40
      NaN
    
    
      5402607488
      1.0
      2.00000
      0
      2259
      2016-01-14
      0
      936
      12
      1
      condo
      45.532808
      w/d in unit
      -122.698992
      attached garage
      1995
      0
      09:36:16
      NaN
    
    
      5402822514
      1.0
      1.00000
      0
      1110
      2016-01-14
      0
      624
      16
      1
      apartment
      45.505277
      w/d in unit
      -122.617757
      street parking
      1495
      0
      11:31:03
      NaN



In [11]:

    
dframe.describe(include='all')









    Out[11]:






  
    
      
      bath
      bed
      cat
      content
      date
      dog
      feet
      getphotos
      hasmap
      housingtype
      lat
      laundry
      long
      parking
      price
      smoking
      time
      wheelchair
    
  
  
    
      count
      28910.000000
      28910.000000
      28910
      28910
      28910
      28910
      28910.000000
      28910
      28910
      28894
      28910.000000
      27078
      28910.000000
      21597
      28673
      28910
      28910
      5789
    
    
      unique
      NaN
      NaN
      2
      3660
      109
      2
      NaN
      24
      2
      11
      NaN
      5
      NaN
      7
      1908
      2
      20949
      1
    
    
      top
      NaN
      NaN
      1
      967
      2016-04-25
      1
      NaN
      8
      1
      apartment
      NaN
      w/d in unit
      NaN
      off-street parking
      995
      no smoking
      10:05:48
      wheelchair accessible
    
    
      freq
      NaN
      NaN
      19847
      218
      508
      18652
      NaN
      2844
      27641
      22741
      NaN
      17651
      NaN
      7247
      652
      17888
      7
      5789
    
    
      mean
      1.323159
      1.432920
      NaN
      NaN
      NaN
      NaN
      939.769616
      NaN
      NaN
      NaN
      45.518067
      NaN
      -122.629399
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      std
      0.523847
      0.974134
      NaN
      NaN
      NaN
      NaN
      4907.809014
      NaN
      NaN
      NaN
      0.126987
      NaN
      0.377066
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      min
      0.500000
      0.000000
      NaN
      NaN
      NaN
      NaN
      -600.000000
      NaN
      NaN
      NaN
      33.764300
      NaN
      -124.960021
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      25%
      1.000000
      1.000000
      NaN
      NaN
      NaN
      NaN
      644.000000
      NaN
      NaN
      NaN
      45.504795
      NaN
      -122.685400
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      50%
      1.000000
      1.000000
      NaN
      NaN
      NaN
      NaN
      888.000000
      NaN
      NaN
      NaN
      45.518067
      NaN
      -122.662622
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      75%
      2.000000
      2.000000
      NaN
      NaN
      NaN
      NaN
      1000.000000
      NaN
      NaN
      NaN
      45.532156
      NaN
      -122.612000
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      max
      6.000000
      8.000000
      NaN
      NaN
      NaN
      NaN
      825842.000000
      NaN
      NaN
      NaN
      47.680721
      NaN
      -76.484000
      NaN
      NaN
      NaN
      NaN
      NaN



In [ ]:

    
data = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
plt.figure(figsize=(15,10))
plt.scatter(data = data, x = 'long',y='lat')

It looks like Portland!!!

Let's cluster the data. Start by creating a list of [['lat','long'], ...]



In [ ]:

    
XYdf = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
data = [[XYdf['lat'][i],XYdf['long'][i]] for i in XYdf.index]

We'll use K Means Clustering because that's the clustering method I recently learned in class! There may be others that work better, but this is the tool that I know



In [ ]:

    
from sklearn.cluster import KMeans
km = KMeans(n_clusters=40)
km.fit(data)
neighborhoods = km.cluster_centers_



In [ ]:

    
%pylab inline
figure(1,figsize=(20,12))
plot([row[1] for row in data],[row[0] for row in data],'b.')
for i in km.cluster_centers_:  
    plot(i[1],i[0], 'g*',ms=25)
'''Note to Riley: come back and make it look pretty'''

We chose our neighborhoods!

I've found that every once in a while the centers end up in different points, but are fairly consistant. Now let's process our data points and figure out where the closest neighborhood center is to it!



In [ ]:

    
neighborhoods = neighborhoods.tolist()
for i in enumerate(neighborhoods):
    i[1].append(i[0])
print neighborhoods

Create a function that will label each point with a number coresponding to it's neighborhood



In [ ]:

    
def clusterer(X, Y,neighborhoods):
    neighbors = []
    for i in neighborhoods:
        distance = ((i[0]-X)**2 + (i[1]-Y)**2)
        neighbors.append(distance)
    closest = min(neighbors)
    return neighbors.index(closest)



In [ ]:

    
neighborhoodlist = []
for i in dframe.index:
    neighborhoodlist.append(clusterer(dframe['lat'][i],dframe['long'][i],neighborhoods))
dframe['neighborhood'] = neighborhoodlist



In [ ]:

    
dframe

Here's the new Part. We're breaking out the neighborhood values into their own columns. Now the algorithms can read them as categorical data rather than continuous data.



In [ ]:



In [ ]:

    
from sklearn import preprocessing
def CategoricalToBinary(dframe,column_name):
    le = preprocessing.LabelEncoder()
    listy = le.fit_transform(dframe[column_name])
    dframe[column_name] = listy
    unique = dframe[column_name].unique()
    serieslist = [list() for _ in xrange(len(unique))]
    
    
    for column, _ in enumerate(serieslist):
        for i, item in enumerate(dframe[column_name]):
            if item == column:
                serieslist[column].append(1)
            else:
                serieslist[column].append(0)
        dframe[column_name+str(column)] = serieslist[column]

   
    return dframe



In [ ]:

    
pd.set_option('max_columns', 100)
dframe = CategoricalToBinary(dframe,'housingtype')
dframe = CategoricalToBinary(dframe,'parking')
dframe = CategoricalToBinary(dframe,'laundry')
dframe = CategoricalToBinary(dframe,'smoking')
dframe = CategoricalToBinary(dframe,'wheelchair')
dframe = CategoricalToBinary(dframe,'neighborhood')
dframe



In [ ]:

    
dframe = dframe.drop('date',1)
dframe = dframe.drop('housingtype',1)
dframe = dframe.drop('parking',1)
dframe = dframe.drop('laundry',1)
dframe = dframe.drop('smoking',1)
dframe = dframe.drop('wheelchair',1)
dframe = dframe.drop('neighborhood',1)
dframe = dframe.drop('time',1)



In [ ]:

    
columns=list(dframe.columns)



In [ ]:

    
from __future__ import division
print len(dframe)
df2 = dframe[dframe.price < 10000][columns].dropna()
print len(df2)
print len(df2)/len(dframe)

price = df2[['price']].values
columns.pop(columns.index('price'))
features = df2[columns].values

from sklearn.cross_validation import train_test_split
features_train, features_test, price_train, price_test = train_test_split(features, price, test_size=0.1, random_state=42)

Ok, lets put it through Decision Tree!

What about Random Forest?



In [ ]:

    
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
reg = RandomForestRegressor()
reg = reg.fit(features_train, price_train)



In [ ]:

    
forest_pred = reg.predict(features_test)
forest_pred = np.array([[item] for item in forest_pred])



In [ ]:

    
print r2_score(forest_pred, price_test)
plt.scatter(forest_pred,price_test)



In [ ]:

    
df2['predictions'] = reg.predict(df2[columns])



In [ ]:

    
df2['predictions_diff'] = df2['predictions']-df2['price']



In [ ]:

    
sd = np.std(df2['predictions_diff'])
sns.kdeplot(df2['predictions_diff'][df2['predictions_diff']>-150][df2['predictions_diff']<150])
sns.plt.xlim(-150,150)



In [ ]:

    
data = df2[dframe.lat > 45.45][df2.lat < 45.6][df2.long < -122.4][df2.long > -122.8][df2['predictions_diff']>-150][df2['predictions_diff']<150]
plt.figure(figsize=(15,10))
plt.scatter(data = data, x = 'long',y='lat', c = 'predictions_diff',s=10,cmap='coolwarm')



In [ ]:

    
dframe



In [ ]:

    
print np.mean([1,2,34,np.nan])



In [ ]:

    
def averager(dframe):
    dframe = dframe.T
    dframe.dropna()
    averages = {}
    for listing in dframe:
        try:
            key = str(dframe[listing]['bed'])+','+str(dframe[listing]['bath'])+','+str(dframe[listing]['neighborhood'])+','+str(dframe[listing]['feet']-dframe[listing]['feet']%50)
            if key not in averages:
                averages[key] = {'average_list':[dframe[listing]['price']], 'average':0}
            elif key in averages:
                averages[key]['average_list'].append(dframe[listing]['price'])
        except TypeError:
            continue
    for entry in averages:
        averages[entry]['average'] = np.mean(averages[entry]['average_list'])
    return averages



In [ ]:

    
averages = averager(dframe)
print averages



In [ ]:

    
dframe['averages']= averages[str(dframe['bed'])+','+str(dframe['bath'])+','+str(dframe['neighborhood'])+','+str(dframe['feet']-dframe['feet']%50)]



In [ ]:

    
dframe.T

Wow! up to .87! That's our best yet! What if we add more trees???



In [ ]:

    
reg = RandomForestRegressor(n_estimators = 100)
reg = reg.fit(features_train, price_train)



In [ ]:

    
forest_pred = reg.predict(features_test)
forest_pred = np.array([[item] for item in forest_pred])



In [ ]:

    
print r2_score(forest_pred, price_test)
print plt.scatter(pred,price_test)



In [ ]:



In [ ]:

    
from sklearn.tree import DecisionTreeRegressor
reg = DecisionTreeRegressor(max_depth = 5)
reg.fit(features_train, price_train)
print len(features_train[0])
columns = [str(x) for x in columns]
print columns
from sklearn.tree import export_graphviz
export_graphviz(reg,feature_names=columns)

Up to .88!

So what is our goal now? I'd like to see if adjusting the number of neighborhoods increases the accuracy. same for the affect with the number of trees



In [ ]:

    
def neighborhood_optimizer(dframe,neighborhood_number_range, counter_num):
    XYdf = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]
    data = [[XYdf['lat'][i],XYdf['long'][i]] for i in XYdf.index]
    r2_dict = []
    for i in neighborhood_number_range:
        counter = counter_num
        average_accuracy_list = []
        while counter > 0:
            km = KMeans(n_clusters=i)
            km.fit(data)
            neighborhoods = km.cluster_centers_
            neighborhoods = neighborhoods.tolist()
            for x in enumerate(neighborhoods):
                x[1].append(x[0])
            neighborhoodlist = []
            for z in dframe.index:
                neighborhoodlist.append(clusterer(dframe['lat'][z],dframe['long'][z],neighborhoods))
            dframecopy = dframe.copy()
            dframecopy['neighborhood'] = Series((neighborhoodlist), index=dframe.index)
            df2 = dframecopy[dframe.price < 10000][['bath','bed','feet','dog','cat','content','getphotos', 'hasmap', 'price','neighborhood']].dropna()
            features = df2[['bath','bed','feet','dog','cat','content','getphotos', 'hasmap', 'neighborhood']].values
            price = df2[['price']].values
            features_train, features_test, price_train, price_test = train_test_split(features, price, test_size=0.1)
            reg = RandomForestRegressor()
            reg = reg.fit(features_train, price_train)
            forest_pred = reg.predict(features_test)
            forest_pred = np.array([[item] for item in forest_pred])
            counter -= 1
            average_accuracy_list.append(r2_score(forest_pred, price_test))
        total = 0
        for entry in average_accuracy_list:
            total += entry
        r2_accuracy = total/len(average_accuracy_list)
        r2_dict.append((i,r2_accuracy))
    print r2_dict
    return r2_dict



In [ ]:

    
neighborhood_number_range = [i for _,i in enumerate(range(2,31,2))]
neighborhood_number_range



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,neighborhood_number_range,10)



In [ ]:

    
r2_dict[:][0]



In [ ]:

    
plt.scatter([x[0] for x in r2_dict],[x[1] for x in r2_dict])

Looks like the optimum is right around 10 or 11, and then starts to drop off. Let's get a little more granular and look at a smaller range



In [ ]:

    
neighborhood_number_range = [i for _,i in enumerate(range(7,15))]
neighborhood_number_range



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,neighborhood_number_range,10)



In [ ]:

    
print r2_dict
plt.scatter([x[0] for x in r2_dict],[x[1] for x in r2_dict])

Trying a few times, it looks like 10, 11 and 12 get the best results at ~.85. Of course, we'll need to redo some of these optomizations after we properly process our data. Hopefully we'll see some more consistency then too.



In [ ]:

    
r2_dict = neighborhood_optimizer(dframe,[10,11,12],25)

Note #1 to Riley: (From Last time) Perhaps look into another regressor? see if there's one that's inherantly better at this kind of thing.

Note #2 to Riley: Figure out how to process data so that you don't have to drop null values

Note #3 to Riley: convert categorical data into binary

Note #4 to Riley: I wonder if increasing the number of neighborhoods would become more accurate as we collect more data? like you could create a bunch of little accurate models instead of a bunch of bigger ones.

Learned: If you plan on using Decision Tree/Random Forest from SKLearn, make sure you collect your discrete variables in separate columns and make them binary yes or no(0 or 1).

	bath	bed	cat	content	date	dog	feet	getphotos	hasmap	housingtype	lat	laundry	long	parking	price	smoking	time	wheelchair
5399866740	1	1	0	754	2016-01-12	0	750	8	0	apartment	NaN	w/d in unit	NaN	off-street parking	1400	0	12:22:07	NaN
5401772970	1	1	1	2632	2016-01-13	1	659	7	1	apartment	45.531	w/d in unit	-122.664	attached garage	1350	no smoking	16:24:11	wheelchair accessible
5402562933	1.5	NaN	0	1001	2016-01-14	0	1	2	1	apartment	45.5333	laundry on site	-122.709	carport	1500	no smoking	09:12:40	NaN
5402607488	1	2	0	2259	2016-01-14	0	936	12	1	condo	45.5328	w/d in unit	-122.699	attached garage	1995	0	09:36:16	NaN
5402822514	1	1	0	1110	2016-01-14	0	624	16	1	apartment	45.5053	w/d in unit	-122.618	street parking	1495	0	11:31:03	NaN

	bath	bed	cat	content	date	dog	feet	getphotos	hasmap	housingtype	lat	laundry	long	parking	price	smoking	time	wheelchair
count	27929	28712	28910	28910	28910	28910	25349	28910	28910	28894	27600.0000	27078	27600.0000	21598	28673	28910	28910	5789
unique	15	9	2	3660	109	2	1385	24	2	11	4741.0000	5	4845.0000	7	1908	2	20949	1
top	1	1	1	967	2016-04-25	1	700	8	1	apartment	45.5142	w/d in unit	-122.6854	off-street parking	995	no smoking	10:05:48	wheelchair accessible
freq	19375	10832	19847	218	508	18652	567	2844	27641	22741	391.0000	17651	366.0000	7248	652	17888	7	5789

	bath	bed	cat	content	date	dog	feet	getphotos	hasmap	housingtype	lat	laundry	long	parking	price	smoking	time	wheelchair
5399866740	1.0	1.00000	0	754	2016-01-12	0	750	8	0	apartment	45.518067	w/d in unit	-122.629399	NaN	1400	0	12:22:07	NaN
5401772970	1.0	1.00000	1	2632	2016-01-13	1	659	7	1	apartment	45.531019	w/d in unit	-122.664049	attached garage	1350	no smoking	16:24:11	wheelchair accessible
5402562933	1.5	1.43292	0	1001	2016-01-14	0	1	2	1	apartment	45.533334	laundry on site	-122.708927	carport	1500	no smoking	09:12:40	NaN
5402607488	1.0	2.00000	0	2259	2016-01-14	0	936	12	1	condo	45.532808	w/d in unit	-122.698992	attached garage	1995	0	09:36:16	NaN
5402822514	1.0	1.00000	0	1110	2016-01-14	0	624	16	1	apartment	45.505277	w/d in unit	-122.617757	street parking	1495	0	11:31:03	NaN

	bath	bed	cat	content	date	dog	feet	getphotos	hasmap	housingtype	lat	laundry	long	parking	price	smoking	time	wheelchair
count	28910.000000	28910.000000	28910	28910	28910	28910	28910.000000	28910	28910	28894	28910.000000	27078	28910.000000	21597	28673	28910	28910	5789
unique	NaN	NaN	2	3660	109	2	NaN	24	2	11	NaN	5	NaN	7	1908	2	20949	1
top	NaN	NaN	1	967	2016-04-25	1	NaN	8	1	apartment	NaN	w/d in unit	NaN	off-street parking	995	no smoking	10:05:48	wheelchair accessible
freq	NaN	NaN	19847	218	508	18652	NaN	2844	27641	22741	NaN	17651	NaN	7247	652	17888	7	5789
mean	1.323159	1.432920	NaN	NaN	NaN	NaN	939.769616	NaN	NaN	NaN	45.518067	NaN	-122.629399	NaN	NaN	NaN	NaN	NaN
std	0.523847	0.974134	NaN	NaN	NaN	NaN	4907.809014	NaN	NaN	NaN	0.126987	NaN	0.377066	NaN	NaN	NaN	NaN	NaN
min	0.500000	0.000000	NaN	NaN	NaN	NaN	-600.000000	NaN	NaN	NaN	33.764300	NaN	-124.960021	NaN	NaN	NaN	NaN	NaN
25%	1.000000	1.000000	NaN	NaN	NaN	NaN	644.000000	NaN	NaN	NaN	45.504795	NaN	-122.685400	NaN	NaN	NaN	NaN	NaN
50%	1.000000	1.000000	NaN	NaN	NaN	NaN	888.000000	NaN	NaN	NaN	45.518067	NaN	-122.662622	NaN	NaN	NaN	NaN	NaN
75%	2.000000	2.000000	NaN	NaN	NaN	NaN	1000.000000	NaN	NaN	NaN	45.532156	NaN	-122.612000	NaN	NaN	NaN	NaN	NaN
max	6.000000	8.000000	NaN	NaN	NaN	NaN	825842.000000	NaN	NaN	NaN	47.680721	NaN	-76.484000	NaN	NaN	NaN	NaN	NaN