notebook.community

Edit and run



In [9]:

    
import pandas as pd
data_311 = pd.read_csv('df_311_ready_to_join.csv', na_values=['-'])
data_311 = data_311.where((pd.notnull(data_311)), None)
data_311.head(15)









    Out[9]:






  
    
      
      feature-311
      tuple
    
  
  
    
      0 
       [0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...
       (6274149.810962193, -122.42479733333332, 37.77...
    
    
      1 
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, ...
       (68448950.51830366, -122.45177022222221, 37.76...
    
    
      2 
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...
       (180695514.55571115, -122.50571599999999, 37.7...
    
    
      3 
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...
       (198060742.09701943, -122.50571599999999, 37.7...
    
    
      4 
       [0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...
       (166811428.19983998, -122.42479733333332, 37.7...
    
    
      5 
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (125240125.90398079, -122.49222955555555, 37.7...
    
    
      6 
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (164827987.29185838, -122.41131088888888, 37.7...
    
    
      7 
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...
       (101115007.92118424, -122.42479733333332, 37.7...
    
    
      8 
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (80592466.28145629, -122.43828377777778, 37.78...
    
    
      9 
       [0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...
       (168268650.0914183, -122.43828377777778, 37.77...
    
    
      10
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
       (16110397.579115825, -122.43828377777778, 37.7...
    
    
      11
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (194822471.22684535, -122.42479733333332, 37.7...
    
    
      12
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (166163774.02580518, -122.41131088888888, 37.7...
    
    
      13
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (135157330.44388878, -122.46525666666668, 37.7...
    
    
      14
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
       (115444356.52170436, -122.39782444444445, 37.7...



In [20]:

    
len(eval(data_311.iloc[0][0]))









    Out[20]:





39



In [10]:

    
data_911 = pd.read_csv('data_911_ready_to_join.csv', na_values=['-'])
data_911 = data_911.where((pd.notnull(data_911)), None)
data_911.head(15)









    Out[10]:






  
    
      
      feature-911
      tuple
    
  
  
    
      0 
       5
       (68448950.51830366, -122.45177022222221, 37.76...
    
    
      1 
       1
       (89821538.2614523, -122.47874311111113, 37.710...
    
    
      2 
       2
       (198060742.09701943, -122.50571599999999, 37.7...
    
    
      3 
       8
       (166811428.19983998, -122.42479733333332, 37.7...
    
    
      4 
       1
       (125240125.90398079, -122.49222955555555, 37.7...
    
    
      5 
       4
       (164827987.29185838, -122.41131088888888, 37.7...
    
    
      6 
       1
       (69987129.18163633, -122.46525666666668, 37.73...
    
    
      7 
       1
       (34447106.3814763, -122.42479733333332, 37.710...
    
    
      8 
       2
       (16110397.579115825, -122.43828377777778, 37.7...
    
    
      9 
       3
       (194822471.22684535, -122.42479733333332, 37.7...
    
    
      10
       2
       (100507832.1330266, -122.43828377777778, 37.78...
    
    
      11
       4
       (135157330.44388878, -122.46525666666668, 37.7...
    
    
      12
       1
       (52176639.39567914, -122.47874311111113, 37.76...
    
    
      13
       1
       (115444356.52170436, -122.39782444444445, 37.7...
    
    
      14
       3
       (77718500.88417684, -122.47874311111113, 37.77...



In [24]:

    
print data_311.head(2)
print len(data_311)
print data_911.head(2)
print len(data_911)









    



                                         feature-311  \
0  [0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...   
1  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, ...   

                                               tuple  
0  (6274149.810962193, -122.42479733333332, 37.77...  
1  (68448950.51830366, -122.45177022222221, 37.76...  
249147
   feature-911                                              tuple
0            5  (68448950.51830366, -122.45177022222221, 37.76...
1            1  (89821538.2614523, -122.47874311111113, 37.710...
216286



In [11]:

    
joined_df = data_311.merge(data_911, how='left', on='tuple')
joined_df = joined_df.where((pd.notnull(joined_df)), 0)
print "done joining"









    



done joining



In [25]:

    
joined_df.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 249147 entries, 0 to 249146
Data columns (total 3 columns):
feature-311    249147 non-null object
tuple          249147 non-null object
feature-911    249147 non-null float64
dtypes: float64(1), object(2)



In [31]:

    
joined_df.head(15)
print (eval(joined_df.iloc[0][0]))









    



[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0]



In [14]:

    
joined_df.to_csv("joined_data.csv", index_label=False)



In [15]:

    
# use the joined table to create a matrix.  
# The matrix will be Nx(D+1), where N is the number of unique tuples, and D is the number of features in 
# our feature vector.  The last column of the matrix corresponds to the feature-911, i.e. the number
# of 911 reports in the given location/time.
import numpy as np
N = len(joined_df)
D = len(eval(joined_df['feature-311'][0]))
mat = np.zeros([N, D+1])
for i in xrange(N):
    #print i
    mat[i,] = eval(joined_df['feature-311'][i]) + [joined_df['feature-911'][i]]



In [22]:

    
print len(mat)
print mat[0]









    



249147
[ 0.  0.  0.  0.  2.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  2.  2.  0.  0.
  0.  0.  0.  0.]



In [23]:

    
np.savetxt("joined_matrix.txt", mat)

	feature-311	tuple
0	[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...	(6274149.810962193, -122.42479733333332, 37.77...
1	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, ...	(68448950.51830366, -122.45177022222221, 37.76...
2	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...	(180695514.55571115, -122.50571599999999, 37.7...
3	[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...	(198060742.09701943, -122.50571599999999, 37.7...
4	[0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...	(166811428.19983998, -122.42479733333332, 37.7...
5	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	(125240125.90398079, -122.49222955555555, 37.7...
6	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	(164827987.29185838, -122.41131088888888, 37.7...
7	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...	(101115007.92118424, -122.42479733333332, 37.7...
8	[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...	(80592466.28145629, -122.43828377777778, 37.78...
9	[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...	(168268650.0914183, -122.43828377777778, 37.77...
10	[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...	(16110397.579115825, -122.43828377777778, 37.7...
11	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	(194822471.22684535, -122.42479733333332, 37.7...
12	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	(166163774.02580518, -122.41131088888888, 37.7...
13	[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...	(135157330.44388878, -122.46525666666668, 37.7...
14	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	(115444356.52170436, -122.39782444444445, 37.7...

	feature-911	tuple
0	5	(68448950.51830366, -122.45177022222221, 37.76...
1	1	(89821538.2614523, -122.47874311111113, 37.710...
2	2	(198060742.09701943, -122.50571599999999, 37.7...
3	8	(166811428.19983998, -122.42479733333332, 37.7...
4	1	(125240125.90398079, -122.49222955555555, 37.7...
5	4	(164827987.29185838, -122.41131088888888, 37.7...
6	1	(69987129.18163633, -122.46525666666668, 37.73...
7	1	(34447106.3814763, -122.42479733333332, 37.710...
8	2	(16110397.579115825, -122.43828377777778, 37.7...
9	3	(194822471.22684535, -122.42479733333332, 37.7...
10	2	(100507832.1330266, -122.43828377777778, 37.78...
11	4	(135157330.44388878, -122.46525666666668, 37.7...
12	1	(52176639.39567914, -122.47874311111113, 37.76...
13	1	(115444356.52170436, -122.39782444444445, 37.7...
14	3	(77718500.88417684, -122.47874311111113, 37.77...