In [1]:
import geopandas
from geopandas import read_file

In [2]:
import mapclassify
mapclassify.__version__


Out[2]:
'2.2.0'

In [3]:
import libpysal
libpysal.__version__


Out[3]:
'4.2.0'

In [5]:
libpysal.examples.available()


                      Name                                        Description  Installed
0                    10740    Albuquerque, New Mexico, Census 2000 Tract Data       True
1                   AirBnB  Airbnb rentals, socioeconomics, and crime in C...      False
2                  Atlanta       Atlanta, GA region homicide counts and rates      False
3                Baltimore          Baltimore house sales prices and hedonics      False
4                Bostonhsg               Boston housing and neighborhood data      False
5              Buenosaires      Electoral Data for 1999 Argentinean Elections      False
6              Charleston1   2000 Census Tract Data for Charleston, SC MSA...      False
7              Charleston2   1998 and 2001 Zip Code Business Patterns (Cen...      False
8           Chicago Health                   Chicago Health + Socio-Economics      False
9              Chile Labor                 Labor Markets in Chile (1982-2002)      False
10         Chile Migration            Internal Migration in Chile (1977-2002)      False
11              Cincinnati         2008 Cincinnati Crime + Socio-Demographics      False
12               Cleveland       2015 sales prices of homes in Cleveland, OH.      False
13                Columbus                        Columbus neighborhood crime      False
14                  Denver   Demographics and housing in Denver neighborho...      False
15               Elections               2012 and 2016 Presidential Elections      False
16                 Grid100                      Grid with simulated variables      False
17               Groceries                          2015 Chicago supermarkets      False
18                  Guerry          Moral statistics of France (Guerry, 1833)      False
19       Health Indicators                Chicago Health Indicators (2005-11)      False
20                 Health+                    2000 Health, Income + Diversity      False
21                Hickory1   2000 Census Tract Data for Hickory, NC MSA an...      False
22                Hickory2   1998 and 2001 Zip Code Business Patterns (Cen...      False
23              Home Sales              2014-15 Home Sales in King County, WA      False
24                 Houston       Houston, TX region homicide counts and rates      False
25                Juvenile             Cardiff juvenile delinquent residences      False
26                Lansing1   2000 Census Tract Data for Lansing, MI MSA an...      False
27                Lansing2   1998 and 2001 Zip Code Business Patterns (Cen...      False
28                 Laozone   Ozone measures at monitoring stations in Los ...      False
29                LasRosas   Corn yield, fertilizer and field data for pre...      False
30                    Line                                     Line Shapefile       True
31           Liquor Stores                         2015 Chicago Liquor Stores      False
32                 Malaria   Malaria incidence and population (1973, 95, 9...      False
33              Milwaukee1       2000 Census Tract Data for Milwaukee, WI MSA      False
34              Milwaukee2   1998 and 2001 Zip Code Business Patterns (Cen...      False
35                   NCOVR                      US county homicides 1960-1990      False
36                    NDVI        Normalized Difference Vegetation Index grid      False
37                     NYC   Demographic and housing data for New York Cit...      False
38            NYC Earnings              Block-level Earnings in NYC (2002-14)      False
39           NYC Education                               NYC Education (2000)      False
40       NYC Neighborhoods       Demographics for New York City neighborhoods      False
41  NYC Socio-Demographics                 NYC Education + Socio-Demographics      False
42              Natregimes                    NCOVR with regimes (book/PySAL)      False
43                   Nepal   Health, poverty and education indicators for ...      False
44                Ohiolung            Ohio lung cancer data, 1968, 1978, 1988      False
45                Orlando1   2000 Census Tract Data for Orlando, FL MSA an...      False
46                Orlando2   1998 and 2001 Zip Code Business Patterns (Cen...      False
47                  Oz9799                        Monthly ozone data, 1997-99      False
48             Phoenix ACS   Phoenix American Community Survey Data (2010,...      False
49              Pittsburgh                      Pittsburgh homicide locations      False
50                   Point                                    Point Shapefile       True
51                  Police           Police expenditures Mississippi counties      False
52                 Polygon                                  Polygon Shapefile       True
53           Polygon_Holes                 Example to test treatment of holes       True
54       Rio Grande do Sul  Cities of the Brazilian State of Rio Grande do...      False
55                    SIDS            North Carolina county SIDS death counts      False
56                   SIDS2   North Carolina county SIDS death counts and r...      False
57             Sacramento1          2000 Census Tract Data for Sacramento MSA      False
58             Sacramento2   1998 and 2001 Zip Code Business Patterns (Cen...      False
59           SanFran Crime   July-Dec 2012 crime incidents in San Francisc...      False
60               Savannah1   2000 Census Tract Data for Savannah, GA MSA a...      False
61               Savannah2   1998 and 2001 Zip Code Business Patterns (Cen...      False
62                 Scotlip               Male lip cancer in Scotland, 1975-80      False
63                Seattle1   2000 Census Tract Data for Seattle, WA MSA an...      False
64                Seattle2   1998 and 2001 Zip Code Business Patterns (Cen...      False
65                   South             US Southern county homicides 1960-1990      False
66                 StLouis   St Louis region county homicide counts and rates      False
67                  Tampa1   2000 Census Tract Data for Tampa, FL MSA and ...      False
68                  arcgis                               arcgis testing files       True
69                  baltim     Baltimore house sales prices and hedonics 1978       True
70                  berlin  Prenzlauer Berg neighborhood AirBnB data from ...       True
71                    book       Synthetic data to illustrate spatial weights       True
72                 burkitt  Burkitt's lymphoma in the Western Nile distric...       True
73                  calemp         Employment density for California counties       True
74                 chicago                              Chicago neighborhoods       True
75              clearwater                               mgwr testing dataset      False
76                columbus              Columbus neighborhood crime data 1980       True
77                 desmith    Small dataset to illustrate Moran's I statistic       True
78                geodanet        Datasets from geodanet for network analysis       True
79                 georgia  Various socio-economic variables for counties ...       True
80                juvenile    Residences of juvenile offenders in Cardiff, UK       True
81                  mexico  Decennial per capita incomes of Mexican states...       True
82                networks                  Datasets used for network testing       True
83                newHaven                            Network testing dataset      False
84               nyc_bikes                           New York City Bike Trips      False
85                   sids2  North Carolina county SIDS death counts and rates       True
86               snow_maps  Public water pumps and Cholera deaths in Londo...       True
87                     stl  Homicides and selected socio-economic characte...       True
88          street_net_pts                              Street network points       True
89                     taz           Traffic Analysis Zones in So. California      False
90                   tokyo                               Tokyo Mortality data       True
91               us_income  Per-capita income for the lower 48 US states 1...       True
92                virginia                        Virginia counties shapefile       True
93                    wmat          Datasets used for spatial weights testing       True

In [6]:
_ = libpysal.examples.load_example('South')
pth = libpysal.examples.get_path('south.shp')


Downloading South to /home/jovyan/.local/pysal_data/South

In [7]:
df = read_file(pth)

New default legend formatting


In [8]:
%matplotlib inline
ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \
             cmap='BuPu', legend=True,
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5)})



In [9]:
labels = [t.get_text() for t in ax.get_legend().get_texts()]
labels


Out[9]:
['[ 0.00,  3.21]', '( 3.21,  6.25]', '( 6.25,  9.96]', '( 9.96, 92.94]']

In [10]:
q4 = mapclassify.Quantiles(df.HR60, k=4)
q4


Out[10]:
Quantiles             

   Interval      Count
----------------------
[ 0.00,  3.21] |   353
( 3.21,  6.25] |   353
( 6.25,  9.96] |   353
( 9.96, 92.94] |   353

In [11]:
labels == q4.get_legend_classes()


Out[11]:
True

Note that in this case, the first interval is closed on the minimum value in the dataset. The other intervals have an open lower bound. This is now displayed in the legend.

Overriding numerical format


In [12]:
ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \
             cmap='BuPu', legend=True,
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5)},
          )



In [13]:
ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \
             cmap='BuPu', legend=True,
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),  'fmt':"{:.4f}"})



In [14]:
ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \
             cmap='BuPu', legend=True,
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),  'fmt':"{:.0f}"})


The new legends_kwds arg fmt takes a string to set the numerical formatting.

When first class lower bound < y.min()


In [15]:
ax = df.plot(column='HR60', scheme='BoxPlot', \
             cmap='BuPu', legend=True,
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),
                         'fmt': "{:.0f}"})



In [16]:
bp = mapclassify.BoxPlot(df.HR60)
bp


Out[16]:
BoxPlot               

   Interval      Count
----------------------
( -inf, -6.90] |     0
(-6.90,  3.21] |   353
( 3.21,  6.25] |   353
( 6.25,  9.96] |   353
( 9.96, 20.07] |   311
(20.07, 92.94] |    42

In [17]:
bp.get_legend_classes(fmt="{:.0f}")


Out[17]:
['(-inf,   -7]',
 '(  -7,    3]',
 '(   3,    6]',
 '(   6,   10]',
 '(  10,   20]',
 '(  20,   93]']

In some classifiers the user should be aware that the lower (upper) bound of the first (last) interval is not equal to the minimum (maximum) of the attribute values. This is useful to detect extreme values and highly skewed distributions.

Categorical Data


In [18]:
ax = df.plot(column='STATE_NAME', categorical=True, legend=True, \
             legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),
                         'fmt': "{:.0f}"}) # fmt is ignored for categorical data



In [ ]: