In [38]:
import pandas as pd
import h5py
import numpy as np
import matplotlib.pyplot as plt
In [39]:
filename = r'..\data\sfbay_listings_03172017.csv'
data = pd.read_csv(filename)
data.head()
Out[39]:
listing_id
date
rent
bedrooms
bathrooms
sqft
rent_sqft
fips_block
state
region
...
race_of_head_4
race_of_head_5
race_of_head_6
race_of_head_7
race_of_head_8
race_of_head_9
recent_mover_0
recent_mover_1
tenure_1
tenure_2
0
5915417519
2016-12-11
2586.0
2.0
NaN
1039.0
2.488932
60971530013001
CA
sfbay
...
NaN
2.0
23.0
NaN
115.0
29.0
1031.0
145.0
560.0
616.0
1
5915416703
2016-12-11
3178.0
1.0
NaN
574.0
5.536585
60750615003012
CA
sfbay
...
NaN
NaN
472.0
NaN
5.0
2.0
697.0
513.0
613.0
597.0
2
5915414697
2016-12-11
3029.0
1.0
NaN
771.0
3.928664
60816080021000
CA
sfbay
...
NaN
NaN
293.0
3.0
9.0
16.0
636.0
70.0
526.0
180.0
3
5915384383
2016-12-11
2395.0
1.0
NaN
770.0
3.110390
60855081021004
CA
sfbay
...
NaN
NaN
704.0
NaN
1.0
21.0
1005.0
265.0
660.0
610.0
4
5915414020
2016-12-11
2735.0
2.0
NaN
922.0
2.966377
60411042002000
CA
sfbay
...
NaN
8.0
50.0
2.0
75.0
47.0
898.0
113.0
504.0
507.0
5 rows × 33 columns
In [41]:
sub = data.dropna()
In [42]:
len(sub.index)
Out[42]:
1300
In [44]:
sub.to_csv(r'..\data\sfbay_listings_03172017_sub.csv')
In [6]:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-6-6471456e2c1e> in <module>()
1 import sys
----> 2 import chilkat
3
4 sftp = chilkat.CkSFtp()
5
ImportError: No module named 'chilkat'
In [9]:
data = pd.read_csv(r'..\data\cl_census_CA.csv')
data.head()
Out[9]:
listing_id
date
rent
bedrooms
sqft
rent_sqft
fips_block
state
mpo_id
cars_tot
...
race_of_head_4
race_of_head_5
race_of_head_6
race_of_head_7
race_of_head_8
race_of_head_9
recent_mover_0
recent_mover_1
tenure_1
tenure_2
0
5873877617
2016-11-13
925.0
2.0
874.0
1.058352
60470010021060
CA
6197202.0
4603.0
...
NaN
8.0
527.0
1.0
198.0
42.0
1516.0
628.0
1310.0
834.0
1
5873876292
2016-11-13
735.0
0.0
650.0
1.130769
60070012001017
CA
6198000.0
646.0
...
NaN
NaN
6.0
1.0
3.0
16.0
371.0
44.0
98.0
317.0
2
5873889346
2016-11-13
1675.0
3.0
1000.0
1.675000
60790125023017
CA
6199200.0
1902.0
...
NaN
NaN
10.0
NaN
30.0
21.0
712.0
257.0
477.0
492.0
3
5873893871
2016-11-13
1818.0
2.0
1084.0
1.677122
60830020061208
CA
6196600.0
1058.0
...
NaN
2.0
21.0
1.0
8.0
8.0
427.0
41.0
420.0
48.0
4
5849643168
2016-11-13
1050.0
2.0
1100.0
0.954545
60890113004008
CA
6198100.0
786.0
...
NaN
3.0
NaN
2.0
26.0
23.0
287.0
205.0
202.0
290.0
5 rows × 29 columns
In [8]:
data.columns.values
Out[8]:
array(['listing_id', 'date', 'rent', 'bedrooms', 'sqft', 'rent_sqft',
'fips_block', 'state', 'mpo_id', 'cars_tot', 'children_tot',
'persons_tot', 'workers_tot', 'age_of_head_med', 'income_med',
'hhs_tot', 'race_of_head_1', 'race_of_head_2', 'race_of_head_3',
'race_of_head_4', 'race_of_head_5', 'race_of_head_6',
'race_of_head_7', 'race_of_head_8', 'race_of_head_9',
'recent_mover_0', 'recent_mover_1', 'tenure_1', 'tenure_2'], dtype=object)
Content source: lrayle/rental-listings-census
Similar notebooks: