Below is from https://dev.socrata.com/blog/2016/02/01/pandas-and-jupyter-notebook.html



In [1]:

    
import pandas as pd
print(pd.__version__)



In [2]:

    
pd.read_json?



In [3]:

    
df = pd.read_json("https://data.smcgov.org/resource/mb6a-xn89.json")

df.head(5)









    Out[3]:







  
    
      
      geography
      geography_type
      year
      less_than_high_school_graduate
      high_school_graduate
      some_college_or_associate_s_degree
      bachelor_s_degree_or_higher
      location_1
      :@computed_region_uph5_8hpn
      :@computed_region_i2t2_cryp
    
  
  
    
      0
      Atherton
      Town
      2014-01-01T00:00:00.000
      13.6
      12.3
      2.7
      3.5
      {'type': 'Point', 'coordinates': [-122.2, 37.4...
      2.0
      28596
    
    
      1
      Colma
      Town
      2014-01-01T00:00:00.000
      6.3
      6.4
      10.4
      2.4
      {'type': 'Point', 'coordinates': [-122.455556,...
      4.0
      28588
    
    
      2
      Foster City
      City
      2014-01-01T00:00:00.000
      11.9
      9.7
      2.0
      2.9
      {'type': 'Point', 'coordinates': [-122.266389,...
      6.0
      319
    
    
      3
      Portola Valley
      Town
      2014-01-01T00:00:00.000
      48.1
      0.0
      0.0
      1.8
      {'type': 'Point', 'coordinates': [-122.218611,...
      14.0
      28597
    
    
      4
      Redwood City
      City
      2014-01-01T00:00:00.000
      16.4
      10.6
      6.6
      3.0
      {'type': 'Point', 'coordinates': [-122.236111,...
      21.0
      28607



In [4]:

    
dir()









    Out[4]:





['In',
 'Out',
 '_',
 '_3',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i2',
 '_i3',
 '_i4',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 'df',
 'exit',
 'get_ipython',
 'pd',
 'quit']



In [5]:

    
df.shape









    Out[5]:





(32, 10)



In [6]:

    
df.describe()









    Out[6]:







  
    
      
      less_than_high_school_graduate
      high_school_graduate
      some_college_or_associate_s_degree
      bachelor_s_degree_or_higher
      :@computed_region_uph5_8hpn
      :@computed_region_i2t2_cryp
    
  
  
    
      count
      32.00000
      32.000000
      32.000000
      32.000000
      30.000000
      32.000000
    
    
      mean
      17.80000
      6.462500
      5.946875
      2.856250
      17.733333
      25062.093750
    
    
      std
      19.29944
      4.693905
      4.728430
      1.873919
      9.762466
      9502.711577
    
    
      min
      0.00000
      0.000000
      0.000000
      0.000000
      1.000000
      312.000000
    
    
      25%
      6.82500
      1.925000
      2.525000
      2.100000
      9.500000
      28587.750000
    
    
      50%
      13.90000
      7.750000
      5.500000
      3.000000
      18.500000
      28595.000000
    
    
      75%
      20.97500
      9.450000
      8.800000
      3.600000
      25.750000
      28604.250000
    
    
      max
      100.00000
      16.400000
      18.500000
      9.100000
      34.000000
      28613.000000



In [7]:

    
df.drop("location_1", axis=1).describe(include="all")









    Out[7]:







  
    
      
      geography
      geography_type
      year
      less_than_high_school_graduate
      high_school_graduate
      some_college_or_associate_s_degree
      bachelor_s_degree_or_higher
      :@computed_region_uph5_8hpn
      :@computed_region_i2t2_cryp
    
  
  
    
      count
      32
      32
      32
      32.00000
      32.000000
      32.000000
      32.000000
      30.000000
      32.000000
    
    
      unique
      32
      3
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      top
      San Carlos
      City
      2014-01-01T00:00:00.000
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      freq
      1
      15
      32
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      mean
      NaN
      NaN
      NaN
      17.80000
      6.462500
      5.946875
      2.856250
      17.733333
      25062.093750
    
    
      std
      NaN
      NaN
      NaN
      19.29944
      4.693905
      4.728430
      1.873919
      9.762466
      9502.711577
    
    
      min
      NaN
      NaN
      NaN
      0.00000
      0.000000
      0.000000
      0.000000
      1.000000
      312.000000
    
    
      25%
      NaN
      NaN
      NaN
      6.82500
      1.925000
      2.525000
      2.100000
      9.500000
      28587.750000
    
    
      50%
      NaN
      NaN
      NaN
      13.90000
      7.750000
      5.500000
      3.000000
      18.500000
      28595.000000
    
    
      75%
      NaN
      NaN
      NaN
      20.97500
      9.450000
      8.800000
      3.600000
      25.750000
      28604.250000
    
    
      max
      NaN
      NaN
      NaN
      100.00000
      16.400000
      18.500000
      9.100000
      34.000000
      28613.000000



In [8]:

    
df.dtypes









    Out[8]:





geography                              object
geography_type                         object
year                                   object
less_than_high_school_graduate        float64
high_school_graduate                  float64
some_college_or_associate_s_degree    float64
bachelor_s_degree_or_higher           float64
location_1                             object
:@computed_region_uph5_8hpn           float64
:@computed_region_i2t2_cryp             int64
dtype: object



In [9]:

    
df.bachelor_s_degree_or_higher.mean()









    Out[9]:





2.8562500000000006



In [10]:

    
df.geography.count()









    Out[10]:





32



In [11]:

    
df.geography_type.unique()









    Out[11]:





array(['Town', 'City', 'CDP'], dtype=object)



In [12]:

    
df.less_than_high_school_graduate.value_counts()









    Out[12]:





0.0      4
14.2     1
8.5      1
7.0      1
100.0    1
9.5      1
11.9     1
4.8      1
31.1     1
26.7     1
6.2      1
15.7     1
22.1     1
16.4     1
6.3      1
44.4     1
20.9     1
7.7      1
9.2      1
37.8     1
3.3      1
15.1     1
48.1     1
18.3     1
21.2     1
16.1     1
13.6     1
13.4     1
20.1     1
Name: less_than_high_school_graduate, dtype: int64



In [13]:

    
def mapGeography(x):
    if x == "City":
        return 1
    else:
        return 0



In [14]:

    
df['geography_mapped_value'] = df.geography_type.apply(mapGeography)



In [15]:

    
df.geography_mapped_value.value_counts()









    Out[15]:





0    17
1    15
Name: geography_mapped_value, dtype: int64



In [16]:

    
df['geography_mapped_value_lambda'] = df.geography_type.apply(lambda y: 1 if y == "City" else 0)



In [17]:

    
df.geography_mapped_value_lambda.value_counts()









    Out[17]:





0    17
1    15
Name: geography_mapped_value_lambda, dtype: int64



In [ ]:

	geography	geography_type	year	less_than_high_school_graduate	high_school_graduate	some_college_or_associate_s_degree	bachelor_s_degree_or_higher	location_1	:@computed_region_uph5_8hpn	:@computed_region_i2t2_cryp
0	Atherton	Town	2014-01-01T00:00:00.000	13.6	12.3	2.7	3.5	{'type': 'Point', 'coordinates': [-122.2, 37.4...	2.0	28596
1	Colma	Town	2014-01-01T00:00:00.000	6.3	6.4	10.4	2.4	{'type': 'Point', 'coordinates': [-122.455556,...	4.0	28588
2	Foster City	City	2014-01-01T00:00:00.000	11.9	9.7	2.0	2.9	{'type': 'Point', 'coordinates': [-122.266389,...	6.0	319
3	Portola Valley	Town	2014-01-01T00:00:00.000	48.1	0.0	0.0	1.8	{'type': 'Point', 'coordinates': [-122.218611,...	14.0	28597
4	Redwood City	City	2014-01-01T00:00:00.000	16.4	10.6	6.6	3.0	{'type': 'Point', 'coordinates': [-122.236111,...	21.0	28607

	less_than_high_school_graduate	high_school_graduate	some_college_or_associate_s_degree	bachelor_s_degree_or_higher	:@computed_region_uph5_8hpn	:@computed_region_i2t2_cryp
count	32.00000	32.000000	32.000000	32.000000	30.000000	32.000000
mean	17.80000	6.462500	5.946875	2.856250	17.733333	25062.093750
std	19.29944	4.693905	4.728430	1.873919	9.762466	9502.711577
min	0.00000	0.000000	0.000000	0.000000	1.000000	312.000000
25%	6.82500	1.925000	2.525000	2.100000	9.500000	28587.750000
50%	13.90000	7.750000	5.500000	3.000000	18.500000	28595.000000
75%	20.97500	9.450000	8.800000	3.600000	25.750000	28604.250000
max	100.00000	16.400000	18.500000	9.100000	34.000000	28613.000000