In [6]:
from __future__ import print_function

import pandas as pd

city_names = pd.Series(['San Francisco', 'San Jose', 'Sacramento'])                
population = pd.Series([852469, 1015785, 485199])

Now play with city_names


In [7]:
cities = pd.DataFrame({ 'City name': city_names, 'Population': population })
cities.describe()


Out[7]:
Population
count 3.000000e+00
mean 7.844843e+05
std 2.717477e+05
min 4.851990e+05
25% 6.688340e+05
50% 8.524690e+05
75% 9.341270e+05
max 1.015785e+06

In [8]:
# play with a bigger file
california_housing_dataframe = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
, sep=",")
california_housing_dataframe.describe()


Out[8]:
longitude latitude housing_median_age total_rooms total_bedrooms population households median_income median_house_value
count 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000
mean -119.562108 35.625225 28.589353 2643.664412 539.410824 1429.573941 501.221941 3.883578 207300.912353
std 2.005166 2.137340 12.586937 2179.947071 421.499452 1147.852959 384.520841 1.908157 115983.764387
min -124.350000 32.540000 1.000000 2.000000 1.000000 3.000000 1.000000 0.499900 14999.000000
25% -121.790000 33.930000 18.000000 1462.000000 297.000000 790.000000 282.000000 2.566375 119400.000000
50% -118.490000 34.250000 29.000000 2127.000000 434.000000 1167.000000 409.000000 3.544600 180400.000000
75% -118.000000 37.720000 37.000000 3151.250000 648.250000 1721.000000 605.250000 4.767000 265000.000000
max -114.310000 41.950000 52.000000 37937.000000 6445.000000 35682.000000 6082.000000 15.000100 500001.000000

In [9]:
# display first few records
california_housing_dataframe.head()


Out[9]:
longitude latitude housing_median_age total_rooms total_bedrooms population households median_income median_house_value
0 -114.31 34.19 15.0 5612.0 1283.0 1015.0 472.0 1.4936 66900.0
1 -114.47 34.40 19.0 7650.0 1901.0 1129.0 463.0 1.8200 80100.0
2 -114.56 33.69 17.0 720.0 174.0 333.0 117.0 1.6509 85700.0
3 -114.57 33.64 14.0 1501.0 337.0 515.0 226.0 3.1917 73400.0
4 -114.57 33.57 20.0 1454.0 326.0 624.0 262.0 1.9250 65500.0

In [10]:
california_housing_dataframe.keys()


Out[10]:
Index([u'longitude', u'latitude', u'housing_median_age', u'total_rooms',
       u'total_bedrooms', u'population', u'households', u'median_income',
       u'median_house_value'],
      dtype='object')