In [19]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [1]:
import numpy as np
In [3]:
z = np.zeros((8,8), dtype=int)
In [4]:
np.tile(np.array([[0, 1], [1, 0]]), (4, 4))
Out[4]:
array([[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0]])
In [6]:
np.__version__
Out[6]:
'1.13.1'
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [13]:
import numpy as np
In [1]:
from urllib.parse import urlparse
In [4]:
def read_csv():
with open("USlocalopendataportals.csv", "r") as inp:
next(inp) # skipping header
reader = (l.strip().split(",") for l in inp) # reading csv (csv module would help, too)
for line in (
"\t".join(f[:5] + f[6:-1] + [urlparse(f[5]).netloc])
for f in reader
):
yield line
In [5]:
for line in read_csv():
print(line)
Alabama AL 4802740 Government No open.alabama.gov
Alaska AK 722718 Government No dof.doa.alaska.gov
Albuquerque NM 539000 Government No www.cabq.gov
Ann Arbor MI 114925 Government No www.a2gov.org
Arizona AZ 6482505 Government No openbooks.az.gov
Arkansas AR 2937979 Government No transparency.arkansas.gov
Arvada CO 83433 Government No arvada.org
Asheville NC 84458 Government No opendatacatalog.ashevillenc.gov
Atlanta GA 419250 Government No gis.atlantaga.gov
Atlanta Regional Commission GA 4142300 Government No www.atlantaregional.com
Austin TX 820611 Government Yes data.austintexas.gov
Baltimore MD 620210 Government No data.baltimorecity.gov
Belleville IL 3951 Government No data.illinois.gov
Bonner County ID 40877 Government No ftp.co.bonner.id.us
Boston MA 609942 Government No www.cityofboston.gov
Boston GIS Data Hub MA 609942 Government No data.cityofboston.gov
California CA 37691912 Government No data.ca.gov
Cambridge GIS MA 105162 Government No cambridgegis.github.io
Champaign IL 39795 Government No data.illinois.gov
Charleston SC 125583 Government No gis.charleston-sc.gov
Charlottesville VA 43956 Government No www.charlottesville.org
Chattanooga TN 170136 Government Yes data.chattlibrary.org
Chattanooga TN 170136 Community Yes chattanooga.demo.socrata.com
Chicago IL 2707120 Government Yes data.cityofchicago.org
Chicago [Metro] IL 9729825 Government Yes www.metrochicagodata.org
Cincinnati OH 296223 Community Yes www.opendatacincy.org
Cobb County GA 9815210 Government Yes www.cobbcounty.org
Colorado CO 5116796 Government No data.colorado.gov
Colorado CO 5116796 Government No tops.state.co.us
Colorado CO 5116796 Community No data.opencolorado.org
Connecticut CT 3580709 Government No transparency.ct.gov
Connecticut CT 3580709 Government No www.osc.ct.gov
Cook County IL 5217080 Government Yes cookcounty.socrata.com
De Leon TX 2233 Government No deleon.socrata.com
Delaware DE 907135 Government No www.delaware.gov
Delaware DE 907135 Government No transparency.delaware.gov
Delaware DE 907135 Government Yes dataexchange.gis.delaware.gov
Denver CO 600024 Government No data.denvergov.org
Denver Regional Council CO 3157520 Regional No data.opencolorado.org
Florida FL 19057542 Government No www.floridahasarighttoknow.com
Florida FL 19057542 Government No www.myfloridacfo.com
Gainesville FL 125326 Government No data.cityofgainesville.org
Georgia GA 9815210 Government No www.open.georgia.gov
Gilpin County CO 5467 Regional No data.opencolorado.org
Glynn County GA 79626 Government No glynncounty.org
Hartford CT 124893 Government Yes data.hartford.gov
Hawaii HI 1374810 Government Yes hawaii.gov
Hawaii HI 1374810 Government Yes data.hawaii.gov
Hawaii HI 1374810 Government Yes planning.hawaii.gov
Hawaii GIS HI 1374810 Government Yes gis.hicentral.com
Honolulu HI 944287 Government Yes data.honolulu.gov
Houston TX 2089090 Government Yes data.ohouston.org
Idaho ID 1584985 Government No transparent.idaho.gov
Idaho ID 1584985 Government No www.insideidaho.org
Illinois IL 12869257 Government Yes data.illinois.gov
Illinois IL 12869257 Government Yes accountability.illinois.gov
Illinois South Suburban Mayors and Managers IL 12869257 Government Yes data.illinois.gov
Indiana IN 6516922 Government No inmap.indiana.edu
Indiana IN 6516922 Government No www.in.gov
Iowa IA 3062309 Government No data.iowa.gov
Kansas KS 2871238 Government No kanview.ks.gov
Kansas City MO 457551 Government No data.kcmo.org
Kentucky [Open Door] KY 4369356 Government No opendoor.ky.gov
King County WA 1969722 Government No data.kingcounty.gov
King County Election Data WA 1969722 Government No electionsdata.kingcounty.gov
Lexington KY 457551 Government Yes data.lexingtonky.gov
Los Angeles [Controller] CA 3819702 Government Yes controllerdata.lacity.org
Louisiana LA 4574836 Government No wwwprd.doa.louisiana.gov
Louisville KY 592529 Government Yes portal.louisvilleky.gov
Lynchburg VA 77113 Government No mapviewer.lynchburgva.gov
Madison WI 236901 Government Yes data.cityofmadison.com
Maine ME 462257 Government No www.maine.gov
Maine ME 462257 Government No opencheckbook.maine.gov
Marietta GA 56602 No www.arcgis.com
Maryland MD 3792647 Government Yes spending.dbm.maryland.gov
Maryland MD 3792647 Government Yes data.maryland.gov
Massachusetts MA 6587536 Government No www.mass.gov
Massachusetts MA 6587536 Government No www.mass.gov
Michigan MI 9876187 Government No www.michigan.gov
Minnesota MN 5344861 Government No mn.gov
Minnesota MN 5344861 Government No www.mmb.state.mn.us
Mississippi MS 2978512 Government No www.transparency.mississippi.gov
Missouri MS 6010688 Government No data.mo.gov
Missouri [Accountability Portal] MS 6010688 Government No mapyourtaxes.mo.gov
Montana MT 998199 Government No transparency.mt.gov
Montgomery County MD 455761 Government Yes data.montgomerycountymd.gov
Nebraska NB 1842641 Government No www.nebraska.gov
Nebraska NE 1842641 Government No nebraskaspending.gov
Nevada NV 2723322 Government No open.nv.gov
New Hampshire NH 1318194 Community Yes nhopengovt.org
New Hampshire NH 1318194 Government Yes www.nh.gov
New Jersey NJ 8821155 Community No data.codefornewark.org
New Jersey [GIS Clearinghouse] NJ 8791909 Government No njgin.state.nj.us
New Jersey [Newspaper Transparency Project] NJ 8791909 Private No datauniverse.com
New Jersey [Transparency Portal] NJ 8821155 Government No yourmoney.nj.gov
New Mexico NM 2082224 Government No www.sunshineportalnm.com
New Orleans LA 321409 Government No data.nola.gov
New York NY 19465197 Government Yes data.ny.gov
New York NY 19465197 Government Yes www.openbooknewyork.com
New York [Department of Health] NY 19465197 Government Yes health.data.ny.gov
New York [Health Data] NY 19465197 Government Yes health.data.ny.gov
New York [State Data Center] NY 19465197 Government Yes esd.ny.gov
New York [State Senate] NY 19465197 Government Yes www.nysenate.gov
New York City NY 8244910 Government Yes www.opendatanyc.com
Newark NJ 277540 Goverment No newarknj.patch.com
Norfolk VA 245782 Public No data.codeforhamptonroads.org
North Carolina NC 9656401 Government No www.ncopenbook.gov
North Carolina [OpenBook] NC 9656401 Government No www.ncopenbook.gov
North Dakota ND 683932 Government No data.share.nd.gov
North Dakota GIS Hub ND 683932 Government No www.nd.gov
Oakland CA 389397 Government Yes data.oaklandnet.com
Oakland CA 389397 Community Yes data.openoakland.org
Ohio OH 11544951 Government No www.sos.state.oh.us
Ohio OH 11544951 Government No transparency.ohio.gov
Oklahoma OK 3791508 Government No www.ok.gov
Oklahoma OK 3791508 Government No data.ok.gov
Oregon OR 3871859 Government No data.oregon.gov
Oregon OR 3871859 Government No www.oregon.gov
Palo Alto CA 64408 Government No data.cityofpaloalto.org
Pennsylvania PA 12742886 Government No www.pennwatch.pa.gov
Pennsylvania [Geodata] PA 12702379 University No www.pasda.psu.edu
Philadelphia PA 1514456 Community Yes www.opendataphilly.org
Portland OR 583778 Government Yes civicapps.org
Providence RI 583778 Government Yes data.providenceri.gov
Raleigh NC 395091 Government Yes data.raleighnc.gov
Redmond WA 26646 Government No data.redmond.gov
Reno NV "233 294" Government http://opendatareno.org/
Rhode Island RI 1051302 Government No www.ri.gov
Rhode Island RI 1051302 Government No www.transparency.ri.gov
Richmond VA 210309 Government No ftp.ci.richmond.va.us
Rockford IL 152222 Government No data.illinois.gov
Sacramento CA 472178 Government Yes portal.cityofsacramento.org
Salt Lake City UT 186443 Government No data.slcgov.com
San Diego CA 1307402 Community No catalog.opensandiego.org
San Francisco CA 797983 Government Yes data.sfgov.org
San Mateo County Open Checkbook WA 727209 Government Yes data.smcgov.org
Santa Cruz CA 60342 Government No data.cityofsantacruz.com
Scottsdale AZ 221020 Government No data.scottsdaleaz.gov
Seattle WA 620778 Government No data.seattle.gov
Snohomish County WA 722400 Government No data.snostat.org
Somerville MA 76519 Government No data.somervillema.gov
South Bend IN 101000 Government Yes data.southbendin.gov
South Carolina SC 4679230 Government No www.cg.sc.gov
South Carolina [GIS] SC 4679230 Government No US State"
South Dakota SD 824082 Government No open.sd.gov
Tennessee TN 6403353 Government No www.tn.gov
Texas TX 25674681 Government Yes www.texas.gov
Texas TX 25674681 Government Yes www.texastransparency.org
Texas [Comptroller Transparency] TX 25674681 Government Yes www.texastransparency.org
Tulsa OK 14741 Government Yes www.cityoftulsa.org
Utah UT 2817222 Government Yes www.utah.gov
Utah UT 2817222 Government Yes www.utah.gov
Vermont VT 626431 Government No spotlight.vermont.gov
Virginia VA 8096604 Government No data.openva.com
Virginia Beach VA 447021 Public No data.codeforhamptonroads.org
Virginia Data Point VA 8096604 Government No datapoint.apa.virginia.gov
Wake County NC 929780 Government No www.wakegov.com
Washington WA 6830038 Government No fiscal.wa.gov
Washington WA 6830038 Government No data.wa.gov
Washington D.C. DC 797983 Government Yes data.octo.dc.gov
Washington D.C. DC 797983 Community Yes www.opendatadc.org
Washington D.C.[GIS] DC 797983 Government Yes opendata.dc.gov
Weatherford TX 25557 Government No tx-weatherford2.civicplus.com
Wellington FL 57163 Government No data.wellingtonfl.gov
West Virginia WV 1855364 Government No transparencywv.org
Williamsburg VA 15167 Government No www.williamsburgva.gov
Wisconsin WI 5711767 Government No sunshine.wi.gov
Wyoming WY 568158 Government No www.wyoming.gov
In [7]:
import numpy as np
In [10]:
n = np.random.random((5, 2)).dot(np.random.random((3, 2)).T)
In [14]:
np.corrcoef(n[:,0], n[:,1])
Out[14]:
array([[ 1. , 0.85030387],
[ 0.85030387, 1. ]])
In [7]:
a = np.random.random((10, 10))
a[:,0] = 0
a = a.reshape((20, 5))
print(a[1:-1, 1:-1].shape)
tile_size = ((a.shape[0] - 2) // 2, (a.shape[1] - 2) // 3)
print(tile_size)
a[1:-1, 1:-1] = np.tile(np.arange(1, 7).reshape(2,3), tile_size)
a
(18, 3)
(9, 1)
Out[7]:
array([[ 0. , 0.46625328, 0.37736699, 0.08833636, 0.85770443],
[ 0.69775697, 1. , 2. , 3. , 0.63264207],
[ 0. , 4. , 5. , 6. , 0.39879536],
[ 0.67104688, 1. , 2. , 3. , 0.25933516],
[ 0. , 4. , 5. , 6. , 0.7128125 ],
[ 0.86760974, 1. , 2. , 3. , 0.16979598],
[ 0. , 4. , 5. , 6. , 0.98746773],
[ 0.20166951, 1. , 2. , 3. , 0.04663976],
[ 0. , 4. , 5. , 6. , 0.13801307],
[ 0.97293067, 1. , 2. , 3. , 0.94733115],
[ 0. , 4. , 5. , 6. , 0.14276034],
[ 0.42257557, 1. , 2. , 3. , 0.58226644],
[ 0. , 4. , 5. , 6. , 0.5968977 ],
[ 0.48194228, 1. , 2. , 3. , 0.55751248],
[ 0. , 4. , 5. , 6. , 0.20394115],
[ 0.65702803, 1. , 2. , 3. , 0.32740624],
[ 0. , 4. , 5. , 6. , 0.63645551],
[ 0.40484853, 1. , 2. , 3. , 0.63060941],
[ 0. , 4. , 5. , 6. , 0.78848634],
[ 0.08515988, 0.48388195, 0.34931021, 0.4914376 , 0.38716937]])
In [2]:
import pandas as pd
In [16]:
import matplotlib.pyplot as plt
In [3]:
df = pd.read_csv("/Users/enchantner/Dev/NPL/USDataLocalSites.csv")
In [4]:
df["Ownership?"].value_counts()
Out[4]:
Government 151
Community 9
Regional 2
Public 2
Goverment 1
University 1
Private 1
Name: Ownership?, dtype: int64
In [16]:
df
Out[16]:
Location
State
Population (US Census, 2011)
Ownership?
Open Data Policy?
Link
Type
0
Alabama
AL
4802740
Government
No
http://open.alabama.gov/
US State
1
Alaska
AK
722718
Government
No
https://dof.doa.alaska.gov/
US State
2
Albuquerque
NM
539000
Government
No
http://www.cabq.gov/abq-data/
US City
3
Ann Arbor
MI
114925
Government
No
http://www.a2gov.org/data/
US City
4
Arizona
AZ
6482505
Government
No
http://openbooks.az.gov
US State
5
Arkansas
AR
2937979
Government
No
http://transparency.arkansas.gov
US State
6
Arvada
CO
83433
Government
No
http://arvada.org/opendata/
US City
7
Asheville
NC
84458
Government
No
http://opendatacatalog.ashevillenc.gov/
US City
8
Atlanta
GA
419250
Government
No
http://gis.atlantaga.gov/
US City
9
Atlanta Regional Commission
GA
4142300
Government
No
http://www.atlantaregional.com/info-center/gis...
US County
10
Austin
TX
820611
Government
Yes
https://data.austintexas.gov
US City
11
Baltimore
MD
620210
Government
No
https://data.baltimorecity.gov
US City
12
Belleville
IL
3951
Government
No
https://data.illinois.gov/belleville
US City
13
Bonner County
ID
40877
Government
No
ftp://ftp.co.bonner.id.us/GISData/
US County
14
Boston
MA
609942
Government
No
http://www.cityofboston.gov/doit/databoston/ap...
US City
15
Boston GIS Data Hub
MA
609942
Government
No
https://data.cityofboston.gov/
US City
16
California
CA
37691912
Government
No
http://data.ca.gov/
US State
17
Cambridge GIS
MA
105162
Government
No
http://cambridgegis.github.io/gisdata.html
US City
18
Champaign
IL
39795
Government
No
https://data.illinois.gov/champaign
US City
19
Charleston
SC
125583
Government
No
http://gis.charleston-sc.gov/dataportal/
US City
20
Charlottesville
VA
43956
Government
No
http://www.charlottesville.org/Index.aspx?page...
US City
21
Chattanooga
TN
170136
Government
Yes
http://data.chattlibrary.org
US City
22
Chattanooga
TN
170136
Community
Yes
https://chattanooga.demo.socrata.com/
US City
23
Chicago
IL
2707120
Government
Yes
https://data.cityofchicago.org
US City
24
Chicago [Metro]
IL
9729825
Government
Yes
https://www.metrochicagodata.org
US Regional
25
Cincinnati
OH
296223
Community
Yes
http://www.opendatacincy.org/
US City
26
Cobb County
GA
9815210
Government
Yes
http://www.cobbcounty.org/index.php?option=com...
US County
27
Colorado
CO
5116796
Government
No
https://data.colorado.gov
US State
28
Colorado
CO
5116796
Government
No
http://tops.state.co.us
US State
29
Colorado
CO
5116796
Community
No
http://data.opencolorado.org
US State
...
...
...
...
...
...
...
...
138
Seattle
WA
620778
Government
No
http://data.seattle.gov
US City
139
Snohomish County
WA
722400
Government
No
https://data.snostat.org
US County
140
Somerville
MA
76519
Government
No
https://data.somervillema.gov
US City
141
South Bend
IN
101000
Government
Yes
https://data.southbendin.gov/
US City
142
South Carolina
SC
4679230
Government
No
http://www.cg.sc.gov/fiscaltransparency
US State
143
South Carolina [GIS]
SC
4679230
Government
No
http://www.gis.sc.gov/data.html,US State
US State
144
South Dakota
SD
824082
Government
No
http://open.sd.gov/
US State
145
Tennessee
TN
6403353
Government
No
http://www.tn.gov/opengov/
US State
146
Texas
TX
25674681
Government
Yes
http://www.texas.gov/en/Connect/Pages/open-dat...
US State
147
Texas
TX
25674681
Government
Yes
http://www.texastransparency.org
US State
148
Texas [Comptroller Transparency]
TX
25674681
Government
Yes
http://www.texastransparency.org/opendata/inde...
Other State Related
149
Tulsa
OK
14741
Government
Yes
https://www.cityoftulsa.org/our-city/open-tuls...
US City
150
Utah
UT
2817222
Government
Yes
http://www.utah.gov/transparency
US State
151
Utah
UT
2817222
Government
Yes
http://www.utah.gov/data/
US State
152
Vermont
VT
626431
Government
No
http://spotlight.vermont.gov
US State
153
Virginia
VA
8096604
Government
No
http://data.openva.com
US State
154
Virginia Beach
VA
447021
Public
No
http://data.codeforhamptonroads.org/organizati...
US City
155
Virginia Data Point
VA
8096604
Government
No
http://datapoint.apa.virginia.gov/
US State
156
Wake County
NC
929780
Government
No
http://www.wakegov.com/data/Pages/default.aspx
US County
157
Washington
WA
6830038
Government
No
http://fiscal.wa.gov
US State
158
Washington
WA
6830038
Government
No
https://data.wa.gov
US State
159
Washington D.C.
DC
797983
Government
Yes
http://data.octo.dc.gov/
US City
160
Washington D.C.
DC
797983
Community
Yes
http://www.opendatadc.org/
US City
161
Washington D.C.[GIS]
DC
797983
Government
Yes
http://opendata.dc.gov/
US City
162
Weatherford
TX
25557
Government
No
http://tx-weatherford2.civicplus.com/index.asp...
US City
163
Wellington
FL
57163
Government
No
https://data.wellingtonfl.gov/
US City
164
West Virginia
WV
1855364
Government
No
http://transparencywv.org
US State
165
Williamsburg
VA
15167
Government
No
http://www.williamsburgva.gov/Index.aspx?page=793
US City
166
Wisconsin
WI
5711767
Government
No
http://sunshine.wi.gov
US State
167
Wyoming
WY
568158
Government
No
http://www.wyoming.gov/transparency.html
US State
168 rows × 7 columns
In [5]:
df10 = df[df["Location"].str.startswith("New York") | df["Location"].str.startswith("Washington D.C.")]
In [6]:
df10
Out[6]:
Location
State
Population (US Census, 2011)
Ownership?
Open Data Policy?
Link
Type
97
New York
NY
19465197
Government
Yes
https://data.ny.gov/
US State
98
New York
NY
19465197
Government
Yes
http://www.openbooknewyork.com
US State
99
New York [Department of Health]
NY
19465197
Government
Yes
https://health.data.ny.gov/
Other State Related
100
New York [Health Data]
NY
19465197
Government
Yes
https://health.data.ny.gov
Other State Related
101
New York [State Data Center]
NY
19465197
Government
Yes
http://esd.ny.gov/NYSDataCenter.html
Other State Related
102
New York [State Senate]
NY
19465197
Government
Yes
http://www.nysenate.gov/opendata/
Other State Related
103
New York City
NY
8244910
Government
Yes
https://www.opendatanyc.com
US City
159
Washington D.C.
DC
797983
Government
Yes
http://data.octo.dc.gov/
US City
160
Washington D.C.
DC
797983
Community
Yes
http://www.opendatadc.org/
US City
161
Washington D.C.[GIS]
DC
797983
Government
Yes
http://opendata.dc.gov/
US City
In [7]:
from urllib.parse import urlparse
In [8]:
df10["Domain"] = df10["Link"].apply(lambda u: urlparse(u).netloc)
/Users/enchantner/.virtualenvs/da/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
"""Entry point for launching an IPython kernel.
In [9]:
df10["Domain"].value_counts()
Out[9]:
health.data.ny.gov 2
esd.ny.gov 1
data.ny.gov 1
www.opendatadc.org 1
data.octo.dc.gov 1
opendata.dc.gov 1
www.opendatanyc.com 1
www.openbooknewyork.com 1
www.nysenate.gov 1
Name: Domain, dtype: int64
In [10]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 7 columns):
Location 168 non-null object
State 168 non-null object
Population (US Census, 2011) 168 non-null object
Ownership? 167 non-null object
Open Data Policy? 168 non-null object
Link 168 non-null object
Type 168 non-null object
dtypes: object(7)
memory usage: 9.3+ KB
In [11]:
df["Population (US Census, 2011)"].astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-7a9c045b5085> in <module>()
----> 1 df["Population (US Census, 2011)"].astype(float)
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
89 else:
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
93 return _deprecate_kwarg
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
3408 # else, only a single dtype is given
3409 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 3410 **kwargs)
3411 return self._constructor(new_data).__finalize__(self)
3412
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, **kwargs)
3222
3223 def astype(self, dtype, **kwargs):
-> 3224 return self.apply('astype', dtype=dtype, **kwargs)
3225
3226 def convert(self, **kwargs):
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3089
3090 kwargs['mgr'] = self
-> 3091 applied = getattr(b, f)(**kwargs)
3092 result_blocks = _extend_blocks(applied, result_blocks)
3093
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in astype(self, dtype, copy, errors, values, **kwargs)
469 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
470 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 471 **kwargs)
472
473 def _astype(self, dtype, copy=False, errors='raise', values=None,
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, raise_on_error, **kwargs)
519
520 # _astype_nansafe works fine with 1-d only
--> 521 values = astype_nansafe(values.ravel(), dtype, copy=True)
522 values = values.reshape(self.shape)
523
~/.virtualenvs/da/lib/python3.6/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy)
634
635 if copy:
--> 636 return arr.astype(dtype)
637 return arr.view(dtype)
638
ValueError: could not convert string to float: '233,294'
In [14]:
# df["Location"].apply(lambda s: s.lower().count("a"))
df["Population"] = df["Population (US Census, 2011)"].str.replace(",", "").astype(np.int32)
In [15]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 8 columns):
Location 168 non-null object
State 168 non-null object
Population (US Census, 2011) 168 non-null object
Ownership? 167 non-null object
Open Data Policy? 168 non-null object
Link 168 non-null object
Type 168 non-null object
Population 168 non-null int32
dtypes: int32(1), object(7)
memory usage: 9.9+ KB
In [16]:
df2 = df.groupby("Location").mean()
In [17]:
df2
Out[17]:
Population
Location
Alabama
4802740
Alaska
722718
Albuquerque
539000
Ann Arbor
114925
Arizona
6482505
Arkansas
2937979
Arvada
83433
Asheville
84458
Atlanta
419250
Atlanta Regional Commission
4142300
Austin
820611
Baltimore
620210
Belleville
3951
Bonner County
40877
Boston
609942
Boston GIS Data Hub
609942
California
37691912
Cambridge GIS
105162
Champaign
39795
Charleston
125583
Charlottesville
43956
Chattanooga
170136
Chicago
2707120
Chicago [Metro]
9729825
Cincinnati
296223
Cobb County
9815210
Colorado
5116796
Connecticut
3580709
Cook County
5217080
De Leon
2233
...
...
San Francisco
797983
San Mateo County Open Checkbook
727209
Santa Cruz
60342
Scottsdale
221020
Seattle
620778
Snohomish County
722400
Somerville
76519
South Bend
101000
South Carolina
4679230
South Carolina [GIS]
4679230
South Dakota
824082
Tennessee
6403353
Texas
25674681
Texas [Comptroller Transparency]
25674681
Tulsa
14741
Utah
2817222
Vermont
626431
Virginia
8096604
Virginia Beach
447021
Virginia Data Point
8096604
Wake County
929780
Washington
6830038
Washington D.C.
797983
Washington D.C.[GIS]
797983
Weatherford
25557
Wellington
57163
West Virginia
1855364
Williamsburg
15167
Wisconsin
5711767
Wyoming
568158
140 rows × 1 columns
In [20]:
df2.plot(kind="bar", figsize=(20, 10))
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x107d57c18>
In [37]:
df2 = df.groupby("Location").mean()
df2.plot(kind="barh", figsize=(10, 20))
Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x114b9b4e0>
In [27]:
df3 = df2.sample(10)
In [28]:
df3
Out[28]:
Population
Location
Indiana
6516922
Massachusetts
6587536
West Virginia
1855364
Idaho
1584985
Kansas City
457551
New York [State Senate]
19465197
Richmond
210309
Texas
25674681
New Jersey [GIS Clearinghouse]
8791909
Sacramento
472178
In [29]:
p = plt.pie(df3["Population"], labels=df3.index)
In [ ]:
Content source: enchantner/python-zero
Similar notebooks: