In [19]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [1]:
import numpy as np

In [3]:
z = np.zeros((8,8), dtype=int)

In [4]:
np.tile(np.array([[0, 1], [1, 0]]), (4, 4))


Out[4]:
array([[0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0]])

In [6]:
np.__version__


Out[6]:
'1.13.1'

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [13]:
import numpy as np

In [1]:
from urllib.parse import urlparse

In [4]:
def read_csv():
    with open("USlocalopendataportals.csv", "r") as inp:
        next(inp)  # skipping header
        reader = (l.strip().split(",") for l in inp)  # reading csv (csv module would help, too)
        for line in (
            "\t".join(f[:5] + f[6:-1] + [urlparse(f[5]).netloc])
            for f in reader
        ):
            yield line

In [5]:
for line in read_csv():
    print(line)


Alabama	AL	4802740	Government	No	open.alabama.gov
Alaska	AK	722718	Government	No	dof.doa.alaska.gov
Albuquerque	NM	539000	Government	No	www.cabq.gov
Ann Arbor	MI	114925	Government	No	www.a2gov.org
Arizona	AZ	6482505	Government	No	openbooks.az.gov
Arkansas	AR	2937979	Government	No	transparency.arkansas.gov
Arvada	CO	83433	Government	No	arvada.org
Asheville	NC	84458	Government	No	opendatacatalog.ashevillenc.gov
Atlanta	GA	419250	Government	No	gis.atlantaga.gov
Atlanta Regional Commission	GA	4142300	Government	No	www.atlantaregional.com
Austin	TX	820611	Government	Yes	data.austintexas.gov
Baltimore	MD	620210	Government	No	data.baltimorecity.gov
Belleville	IL	3951	Government	No	data.illinois.gov
Bonner County	ID	40877	Government	No	ftp.co.bonner.id.us
Boston	MA	609942	Government	No	www.cityofboston.gov
Boston GIS Data Hub	MA	609942	Government	No	data.cityofboston.gov
California	CA	37691912	Government	No	data.ca.gov
Cambridge GIS	MA	105162	Government	No	cambridgegis.github.io
Champaign	IL	39795	Government	No	data.illinois.gov
Charleston	SC	125583	Government	No	gis.charleston-sc.gov
Charlottesville	VA	43956	Government	No	www.charlottesville.org
Chattanooga	TN	170136	Government	Yes	data.chattlibrary.org
Chattanooga	TN	170136	Community	Yes	chattanooga.demo.socrata.com
Chicago	IL	2707120	Government	Yes	data.cityofchicago.org
Chicago [Metro]	IL	9729825	Government	Yes	www.metrochicagodata.org
Cincinnati	OH	296223	Community	Yes	www.opendatacincy.org
Cobb County	GA	9815210	Government	Yes	www.cobbcounty.org
Colorado	CO	5116796	Government	No	data.colorado.gov
Colorado	CO	5116796	Government	No	tops.state.co.us
Colorado	CO	5116796	Community	No	data.opencolorado.org
Connecticut	CT	3580709	Government	No	transparency.ct.gov
Connecticut	CT	3580709	Government	No	www.osc.ct.gov
Cook County	IL	5217080	Government	Yes	cookcounty.socrata.com
De Leon	TX	2233	Government	No	deleon.socrata.com
Delaware	DE	907135	Government	No	www.delaware.gov
Delaware	DE	907135	Government	No	transparency.delaware.gov
Delaware	DE	907135	Government	Yes	dataexchange.gis.delaware.gov
Denver	CO	600024	Government	No	data.denvergov.org
Denver Regional Council	CO	3157520	Regional	No	data.opencolorado.org
Florida	FL	19057542	Government	No	www.floridahasarighttoknow.com
Florida	FL	19057542	Government	No	www.myfloridacfo.com
Gainesville	FL	125326	Government	No	data.cityofgainesville.org
Georgia	GA	9815210	Government	No	www.open.georgia.gov
Gilpin County	CO	5467	Regional	No	data.opencolorado.org
Glynn County	GA	79626	Government	No	glynncounty.org
Hartford	CT	124893	Government	Yes	data.hartford.gov
Hawaii	HI	1374810	Government	Yes	hawaii.gov
Hawaii	HI	1374810	Government	Yes	data.hawaii.gov
Hawaii	HI	1374810	Government	Yes	planning.hawaii.gov
Hawaii GIS	HI	1374810	Government	Yes	gis.hicentral.com
Honolulu	HI	944287	Government	Yes	data.honolulu.gov
Houston	TX	2089090	Government	Yes	data.ohouston.org
Idaho	ID	1584985	Government	No	transparent.idaho.gov
Idaho	ID	1584985	Government	No	www.insideidaho.org
Illinois	IL	12869257	Government	Yes	data.illinois.gov
Illinois	IL	12869257	Government	Yes	accountability.illinois.gov
Illinois South Suburban Mayors and Managers	IL	12869257	Government	Yes	data.illinois.gov
Indiana	IN	6516922	Government	No	inmap.indiana.edu
Indiana	IN	6516922	Government	No	www.in.gov
Iowa	IA	3062309	Government	No	data.iowa.gov
Kansas	KS	2871238	Government	No	kanview.ks.gov
Kansas City	MO	457551	Government	No	data.kcmo.org
Kentucky [Open Door]	KY	4369356	Government	No	opendoor.ky.gov
King County	WA	1969722	Government	No	data.kingcounty.gov
King County Election Data	WA	1969722	Government	No	electionsdata.kingcounty.gov
Lexington	KY	457551	Government	Yes	data.lexingtonky.gov
Los Angeles [Controller]	CA	3819702	Government	Yes	controllerdata.lacity.org
Louisiana	LA	4574836	Government	No	wwwprd.doa.louisiana.gov
Louisville	KY	592529	Government	Yes	portal.louisvilleky.gov
Lynchburg	VA	77113	Government	No	mapviewer.lynchburgva.gov
Madison	WI	236901	Government	Yes	data.cityofmadison.com
Maine	ME	462257	Government	No	www.maine.gov
Maine	ME	462257	Government	No	opencheckbook.maine.gov
Marietta	GA	56602		No	www.arcgis.com
Maryland	MD	3792647	Government	Yes	spending.dbm.maryland.gov
Maryland	MD	3792647	Government	Yes	data.maryland.gov
Massachusetts	MA	6587536	Government	No	www.mass.gov
Massachusetts	MA	6587536	Government	No	www.mass.gov
Michigan	MI	9876187	Government	No	www.michigan.gov
Minnesota	MN	5344861	Government	No	mn.gov
Minnesota	MN	5344861	Government	No	www.mmb.state.mn.us
Mississippi	MS	2978512	Government	No	www.transparency.mississippi.gov
Missouri	MS	6010688	Government	No	data.mo.gov
Missouri [Accountability Portal]	MS	6010688	Government	No	mapyourtaxes.mo.gov
Montana	MT	998199	Government	No	transparency.mt.gov
Montgomery County	MD	455761	Government	Yes	data.montgomerycountymd.gov
Nebraska	NB	1842641	Government	No	www.nebraska.gov
Nebraska	NE	1842641	Government	No	nebraskaspending.gov
Nevada	NV	2723322	Government	No	open.nv.gov
New Hampshire	NH	1318194	Community	Yes	nhopengovt.org
New Hampshire	NH	1318194	Government	Yes	www.nh.gov
New Jersey	NJ	8821155	Community	No	data.codefornewark.org
New Jersey [GIS Clearinghouse]	NJ	8791909	Government	No	njgin.state.nj.us
New Jersey [Newspaper Transparency Project]	NJ	8791909	Private	No	datauniverse.com
New Jersey [Transparency Portal]	NJ	8821155	Government	No	yourmoney.nj.gov
New Mexico	NM	2082224	Government	No	www.sunshineportalnm.com
New Orleans	LA	321409	Government	No	data.nola.gov
New York	NY	19465197	Government	Yes	data.ny.gov
New York	NY	19465197	Government	Yes	www.openbooknewyork.com
New York [Department of Health]	NY	19465197	Government	Yes	health.data.ny.gov
New York [Health Data]	NY	19465197	Government	Yes	health.data.ny.gov
New York [State Data Center]	NY	19465197	Government	Yes	esd.ny.gov
New York [State Senate]	NY	19465197	Government	Yes	www.nysenate.gov
New York City	NY	8244910	Government	Yes	www.opendatanyc.com
Newark	NJ	277540	Goverment	No	newarknj.patch.com
Norfolk	VA	245782	Public	No	data.codeforhamptonroads.org
North Carolina	NC	9656401	Government	No	www.ncopenbook.gov
North Carolina [OpenBook]	NC	9656401	Government	No	www.ncopenbook.gov
North Dakota	ND	683932	Government	No	data.share.nd.gov
North Dakota GIS Hub	ND	683932	Government	No	www.nd.gov
Oakland	CA	389397	Government	Yes	data.oaklandnet.com
Oakland	CA	389397	Community	Yes	data.openoakland.org
Ohio	OH	11544951	Government	No	www.sos.state.oh.us
Ohio	OH	11544951	Government	No	transparency.ohio.gov
Oklahoma	OK	3791508	Government	No	www.ok.gov
Oklahoma	OK	3791508	Government	No	data.ok.gov
Oregon	OR	3871859	Government	No	data.oregon.gov
Oregon	OR	3871859	Government	No	www.oregon.gov
Palo Alto	CA	64408	Government	No	data.cityofpaloalto.org
Pennsylvania	PA	12742886	Government	No	www.pennwatch.pa.gov
Pennsylvania [Geodata]	PA	12702379	University	No	www.pasda.psu.edu
Philadelphia	PA	1514456	Community	Yes	www.opendataphilly.org
Portland	OR	583778	Government	Yes	civicapps.org
Providence	RI	583778	Government	Yes	data.providenceri.gov
Raleigh	NC	395091	Government	Yes	data.raleighnc.gov
Redmond	WA	26646	Government	No	data.redmond.gov
Reno	NV	"233	294"	Government	http://opendatareno.org/	
Rhode Island	RI	1051302	Government	No	www.ri.gov
Rhode Island	RI	1051302	Government	No	www.transparency.ri.gov
Richmond	VA	210309	Government	No	ftp.ci.richmond.va.us
Rockford	IL	152222	Government	No	data.illinois.gov
Sacramento	CA	472178	Government	Yes	portal.cityofsacramento.org
Salt Lake City	UT	186443	Government	No	data.slcgov.com
San Diego	CA	1307402	Community	No	catalog.opensandiego.org
San Francisco	CA	797983	Government	Yes	data.sfgov.org
San Mateo County Open Checkbook	WA	727209	Government	Yes	data.smcgov.org
Santa Cruz	CA	60342	Government	No	data.cityofsantacruz.com
Scottsdale	AZ	221020	Government	No	data.scottsdaleaz.gov
Seattle	WA	620778	Government	No	data.seattle.gov
Snohomish County	WA	722400	Government	No	data.snostat.org
Somerville	MA	76519	Government	No	data.somervillema.gov
South Bend	IN	101000	Government	Yes	data.southbendin.gov
South Carolina	SC	4679230	Government	No	www.cg.sc.gov
South Carolina [GIS]	SC	4679230	Government	No	US State"	
South Dakota	SD	824082	Government	No	open.sd.gov
Tennessee	TN	6403353	Government	No	www.tn.gov
Texas	TX	25674681	Government	Yes	www.texas.gov
Texas	TX	25674681	Government	Yes	www.texastransparency.org
Texas [Comptroller Transparency]	TX	25674681	Government	Yes	www.texastransparency.org
Tulsa	OK	14741	Government	Yes	www.cityoftulsa.org
Utah	UT	2817222	Government	Yes	www.utah.gov
Utah	UT	2817222	Government	Yes	www.utah.gov
Vermont	VT	626431	Government	No	spotlight.vermont.gov
Virginia	VA	8096604	Government	No	data.openva.com
Virginia Beach	VA	447021	Public	No	data.codeforhamptonroads.org
Virginia Data Point	VA	8096604	Government	No	datapoint.apa.virginia.gov
Wake County	NC	929780	Government	No	www.wakegov.com
Washington	WA	6830038	Government	No	fiscal.wa.gov
Washington	WA	6830038	Government	No	data.wa.gov
Washington D.C.	DC	797983	Government	Yes	data.octo.dc.gov
Washington D.C.	DC	797983	Community	Yes	www.opendatadc.org
Washington D.C.[GIS]	DC	797983	Government	Yes	opendata.dc.gov
Weatherford	TX	25557	Government	No	tx-weatherford2.civicplus.com
Wellington	FL	57163	Government	No	data.wellingtonfl.gov
West Virginia	WV	1855364	Government	No	transparencywv.org
Williamsburg	VA	15167	Government	No	www.williamsburgva.gov
Wisconsin	WI	5711767	Government	No	sunshine.wi.gov
Wyoming	WY	568158	Government	No	www.wyoming.gov

In [7]:
import numpy as np

In [10]:
n = np.random.random((5, 2)).dot(np.random.random((3, 2)).T)

In [14]:
np.corrcoef(n[:,0], n[:,1])


Out[14]:
array([[ 1.        ,  0.85030387],
       [ 0.85030387,  1.        ]])

In [7]:
a = np.random.random((10, 10))
a[:,0] = 0
a = a.reshape((20, 5))
print(a[1:-1, 1:-1].shape)
tile_size = ((a.shape[0] - 2) // 2, (a.shape[1] - 2) // 3)
print(tile_size)
a[1:-1, 1:-1] = np.tile(np.arange(1, 7).reshape(2,3), tile_size)
a


(18, 3)
(9, 1)
Out[7]:
array([[ 0.        ,  0.46625328,  0.37736699,  0.08833636,  0.85770443],
       [ 0.69775697,  1.        ,  2.        ,  3.        ,  0.63264207],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.39879536],
       [ 0.67104688,  1.        ,  2.        ,  3.        ,  0.25933516],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.7128125 ],
       [ 0.86760974,  1.        ,  2.        ,  3.        ,  0.16979598],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.98746773],
       [ 0.20166951,  1.        ,  2.        ,  3.        ,  0.04663976],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.13801307],
       [ 0.97293067,  1.        ,  2.        ,  3.        ,  0.94733115],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.14276034],
       [ 0.42257557,  1.        ,  2.        ,  3.        ,  0.58226644],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.5968977 ],
       [ 0.48194228,  1.        ,  2.        ,  3.        ,  0.55751248],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.20394115],
       [ 0.65702803,  1.        ,  2.        ,  3.        ,  0.32740624],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.63645551],
       [ 0.40484853,  1.        ,  2.        ,  3.        ,  0.63060941],
       [ 0.        ,  4.        ,  5.        ,  6.        ,  0.78848634],
       [ 0.08515988,  0.48388195,  0.34931021,  0.4914376 ,  0.38716937]])

In [2]:
import pandas as pd

In [16]:
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("/Users/enchantner/Dev/NPL/USDataLocalSites.csv")

In [4]:
df["Ownership?"].value_counts()


Out[4]:
Government    151
Community       9
Regional        2
Public          2
Goverment       1
University      1
Private         1
Name: Ownership?, dtype: int64

In [16]:
df


Out[16]:
Location State Population (US Census, 2011) Ownership? Open Data Policy? Link Type
0 Alabama AL 4802740 Government No http://open.alabama.gov/ US State
1 Alaska AK 722718 Government No https://dof.doa.alaska.gov/ US State
2 Albuquerque NM 539000 Government No http://www.cabq.gov/abq-data/ US City
3 Ann Arbor MI 114925 Government No http://www.a2gov.org/data/ US City
4 Arizona AZ 6482505 Government No http://openbooks.az.gov US State
5 Arkansas AR 2937979 Government No http://transparency.arkansas.gov US State
6 Arvada CO 83433 Government No http://arvada.org/opendata/ US City
7 Asheville NC 84458 Government No http://opendatacatalog.ashevillenc.gov/ US City
8 Atlanta GA 419250 Government No http://gis.atlantaga.gov/ US City
9 Atlanta Regional Commission GA 4142300 Government No http://www.atlantaregional.com/info-center/gis... US County
10 Austin TX 820611 Government Yes https://data.austintexas.gov US City
11 Baltimore MD 620210 Government No https://data.baltimorecity.gov US City
12 Belleville IL 3951 Government No https://data.illinois.gov/belleville US City
13 Bonner County ID 40877 Government No ftp://ftp.co.bonner.id.us/GISData/ US County
14 Boston MA 609942 Government No http://www.cityofboston.gov/doit/databoston/ap... US City
15 Boston GIS Data Hub MA 609942 Government No https://data.cityofboston.gov/ US City
16 California CA 37691912 Government No http://data.ca.gov/ US State
17 Cambridge GIS MA 105162 Government No http://cambridgegis.github.io/gisdata.html US City
18 Champaign IL 39795 Government No https://data.illinois.gov/champaign US City
19 Charleston SC 125583 Government No http://gis.charleston-sc.gov/dataportal/ US City
20 Charlottesville VA 43956 Government No http://www.charlottesville.org/Index.aspx?page... US City
21 Chattanooga TN 170136 Government Yes http://data.chattlibrary.org US City
22 Chattanooga TN 170136 Community Yes https://chattanooga.demo.socrata.com/ US City
23 Chicago IL 2707120 Government Yes https://data.cityofchicago.org US City
24 Chicago [Metro] IL 9729825 Government Yes https://www.metrochicagodata.org US Regional
25 Cincinnati OH 296223 Community Yes http://www.opendatacincy.org/ US City
26 Cobb County GA 9815210 Government Yes http://www.cobbcounty.org/index.php?option=com... US County
27 Colorado CO 5116796 Government No https://data.colorado.gov US State
28 Colorado CO 5116796 Government No http://tops.state.co.us US State
29 Colorado CO 5116796 Community No http://data.opencolorado.org US State
... ... ... ... ... ... ... ...
138 Seattle WA 620778 Government No http://data.seattle.gov US City
139 Snohomish County WA 722400 Government No https://data.snostat.org US County
140 Somerville MA 76519 Government No https://data.somervillema.gov US City
141 South Bend IN 101000 Government Yes https://data.southbendin.gov/ US City
142 South Carolina SC 4679230 Government No http://www.cg.sc.gov/fiscaltransparency US State
143 South Carolina [GIS] SC 4679230 Government No http://www.gis.sc.gov/data.html,US State US State
144 South Dakota SD 824082 Government No http://open.sd.gov/ US State
145 Tennessee TN 6403353 Government No http://www.tn.gov/opengov/ US State
146 Texas TX 25674681 Government Yes http://www.texas.gov/en/Connect/Pages/open-dat... US State
147 Texas TX 25674681 Government Yes http://www.texastransparency.org US State
148 Texas [Comptroller Transparency] TX 25674681 Government Yes http://www.texastransparency.org/opendata/inde... Other State Related
149 Tulsa OK 14741 Government Yes https://www.cityoftulsa.org/our-city/open-tuls... US City
150 Utah UT 2817222 Government Yes http://www.utah.gov/transparency US State
151 Utah UT 2817222 Government Yes http://www.utah.gov/data/ US State
152 Vermont VT 626431 Government No http://spotlight.vermont.gov US State
153 Virginia VA 8096604 Government No http://data.openva.com US State
154 Virginia Beach VA 447021 Public No http://data.codeforhamptonroads.org/organizati... US City
155 Virginia Data Point VA 8096604 Government No http://datapoint.apa.virginia.gov/ US State
156 Wake County NC 929780 Government No http://www.wakegov.com/data/Pages/default.aspx US County
157 Washington WA 6830038 Government No http://fiscal.wa.gov US State
158 Washington WA 6830038 Government No https://data.wa.gov US State
159 Washington D.C. DC 797983 Government Yes http://data.octo.dc.gov/ US City
160 Washington D.C. DC 797983 Community Yes http://www.opendatadc.org/ US City
161 Washington D.C.[GIS] DC 797983 Government Yes http://opendata.dc.gov/ US City
162 Weatherford TX 25557 Government No http://tx-weatherford2.civicplus.com/index.asp... US City
163 Wellington FL 57163 Government No https://data.wellingtonfl.gov/ US City
164 West Virginia WV 1855364 Government No http://transparencywv.org US State
165 Williamsburg VA 15167 Government No http://www.williamsburgva.gov/Index.aspx?page=793 US City
166 Wisconsin WI 5711767 Government No http://sunshine.wi.gov US State
167 Wyoming WY 568158 Government No http://www.wyoming.gov/transparency.html US State

168 rows × 7 columns


In [5]:
df10 = df[df["Location"].str.startswith("New York") | df["Location"].str.startswith("Washington D.C.")]

In [6]:
df10


Out[6]:
Location State Population (US Census, 2011) Ownership? Open Data Policy? Link Type
97 New York NY 19465197 Government Yes https://data.ny.gov/ US State
98 New York NY 19465197 Government Yes http://www.openbooknewyork.com US State
99 New York [Department of Health] NY 19465197 Government Yes https://health.data.ny.gov/ Other State Related
100 New York [Health Data] NY 19465197 Government Yes https://health.data.ny.gov Other State Related
101 New York [State Data Center] NY 19465197 Government Yes http://esd.ny.gov/NYSDataCenter.html Other State Related
102 New York [State Senate] NY 19465197 Government Yes http://www.nysenate.gov/opendata/ Other State Related
103 New York City NY 8244910 Government Yes https://www.opendatanyc.com US City
159 Washington D.C. DC 797983 Government Yes http://data.octo.dc.gov/ US City
160 Washington D.C. DC 797983 Community Yes http://www.opendatadc.org/ US City
161 Washington D.C.[GIS] DC 797983 Government Yes http://opendata.dc.gov/ US City

In [7]:
from urllib.parse import urlparse

In [8]:
df10["Domain"] = df10["Link"].apply(lambda u: urlparse(u).netloc)


/Users/enchantner/.virtualenvs/da/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.

In [9]:
df10["Domain"].value_counts()


Out[9]:
health.data.ny.gov         2
esd.ny.gov                 1
data.ny.gov                1
www.opendatadc.org         1
data.octo.dc.gov           1
opendata.dc.gov            1
www.opendatanyc.com        1
www.openbooknewyork.com    1
www.nysenate.gov           1
Name: Domain, dtype: int64

In [10]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 7 columns):
Location                        168 non-null object
State                           168 non-null object
Population (US Census, 2011)    168 non-null object
Ownership?                      167 non-null object
Open Data Policy?               168 non-null object
Link                            168 non-null object
Type                            168 non-null object
dtypes: object(7)
memory usage: 9.3+ KB

In [11]:
df["Population (US Census, 2011)"].astype(float)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-7a9c045b5085> in <module>()
----> 1 df["Population (US Census, 2011)"].astype(float)

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
     89                 else:
     90                     kwargs[new_arg_name] = new_arg_value
---> 91             return func(*args, **kwargs)
     92         return wrapper
     93     return _deprecate_kwarg

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
   3408         # else, only a single dtype is given
   3409         new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 3410                                      **kwargs)
   3411         return self._constructor(new_data).__finalize__(self)
   3412 

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, **kwargs)
   3222 
   3223     def astype(self, dtype, **kwargs):
-> 3224         return self.apply('astype', dtype=dtype, **kwargs)
   3225 
   3226     def convert(self, **kwargs):

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
   3089 
   3090             kwargs['mgr'] = self
-> 3091             applied = getattr(b, f)(**kwargs)
   3092             result_blocks = _extend_blocks(applied, result_blocks)
   3093 

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, copy, errors, values, **kwargs)
    469     def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
    470         return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 471                             **kwargs)
    472 
    473     def _astype(self, dtype, copy=False, errors='raise', values=None,

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, raise_on_error, **kwargs)
    519 
    520                 # _astype_nansafe works fine with 1-d only
--> 521                 values = astype_nansafe(values.ravel(), dtype, copy=True)
    522                 values = values.reshape(self.shape)
    523 

~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy)
    634 
    635     if copy:
--> 636         return arr.astype(dtype)
    637     return arr.view(dtype)
    638 

ValueError: could not convert string to float: '233,294'

In [14]:
# df["Location"].apply(lambda s: s.lower().count("a"))
df["Population"] = df["Population (US Census, 2011)"].str.replace(",", "").astype(np.int32)

In [15]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 8 columns):
Location                        168 non-null object
State                           168 non-null object
Population (US Census, 2011)    168 non-null object
Ownership?                      167 non-null object
Open Data Policy?               168 non-null object
Link                            168 non-null object
Type                            168 non-null object
Population                      168 non-null int32
dtypes: int32(1), object(7)
memory usage: 9.9+ KB

In [16]:
df2 = df.groupby("Location").mean()

In [17]:
df2


Out[17]:
Population
Location
Alabama 4802740
Alaska 722718
Albuquerque 539000
Ann Arbor 114925
Arizona 6482505
Arkansas 2937979
Arvada 83433
Asheville 84458
Atlanta 419250
Atlanta Regional Commission 4142300
Austin 820611
Baltimore 620210
Belleville 3951
Bonner County 40877
Boston 609942
Boston GIS Data Hub 609942
California 37691912
Cambridge GIS 105162
Champaign 39795
Charleston 125583
Charlottesville 43956
Chattanooga 170136
Chicago 2707120
Chicago [Metro] 9729825
Cincinnati 296223
Cobb County 9815210
Colorado 5116796
Connecticut 3580709
Cook County 5217080
De Leon 2233
... ...
San Francisco 797983
San Mateo County Open Checkbook 727209
Santa Cruz 60342
Scottsdale 221020
Seattle 620778
Snohomish County 722400
Somerville 76519
South Bend 101000
South Carolina 4679230
South Carolina [GIS] 4679230
South Dakota 824082
Tennessee 6403353
Texas 25674681
Texas [Comptroller Transparency] 25674681
Tulsa 14741
Utah 2817222
Vermont 626431
Virginia 8096604
Virginia Beach 447021
Virginia Data Point 8096604
Wake County 929780
Washington 6830038
Washington D.C. 797983
Washington D.C.[GIS] 797983
Weatherford 25557
Wellington 57163
West Virginia 1855364
Williamsburg 15167
Wisconsin 5711767
Wyoming 568158

140 rows × 1 columns


In [20]:
df2.plot(kind="bar", figsize=(20, 10))


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x107d57c18>

In [37]:
df2 = df.groupby("Location").mean()
df2.plot(kind="barh", figsize=(10, 20))


Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x114b9b4e0>

In [27]:
df3 = df2.sample(10)

In [28]:
df3


Out[28]:
Population
Location
Indiana 6516922
Massachusetts 6587536
West Virginia 1855364
Idaho 1584985
Kansas City 457551
New York [State Senate] 19465197
Richmond 210309
Texas 25674681
New Jersey [GIS Clearinghouse] 8791909
Sacramento 472178

In [29]:
p = plt.pie(df3["Population"], labels=df3.index)



In [ ]: