Computational Narratives

Computers are good at consuming, producing and processing data

Humans are good at consuming, producing and processing stories

For data to be useful to humans, we need tools for telling stories that involve code and data



In [1]:

    
%load_ext load_style
%load_style images.css

Illustration

Data and code



In [2]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns



In [3]:

    
df1 = pd.read_csv('data/police_locals.csv')
df1 = df1.replace('**',np.nan)
df1['all'] = df1['all'].astype('float')
df1['white'] = df1['white'].astype('float')
df1['non-white'] = df1['non-white'].astype('float')
df1['black'] = df1['black'].astype('float')
df1['hispanic'] = df1['hispanic'].astype('float')









    



/Users/bgranger/anaconda/envs/python34/lib/python3.5/site-packages/pandas/core/common.py:449: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  mask = arr == x



In [4]:

    
df1









    Out[4]:






  
    
      
      city
      police_force_size
      all
      white
      non-white
      black
      hispanic
    
  
  
    
      0
      New York
      32300
      0.617957
      0.446387
      0.764419
      0.770891
      0.762861
    
    
      1
      Chicago
      12120
      0.875000
      0.871963
      0.877400
      0.897406
      0.839827
    
    
      2
      Los Angeles
      10100
      0.228218
      0.152778
      0.263848
      0.387387
      0.217680
    
    
      3
      Washington
      9340
      0.115632
      0.056774
      0.157365
      0.170189
      0.089888
    
    
      4
      Houston
      7700
      0.292208
      0.173735
      0.399258
      0.366379
      0.457143
    
    
      5
      Philadelphia
      6045
      0.835401
      0.776899
      0.899480
      0.924658
      0.817391
    
    
      6
      Phoenix
      4475
      0.311732
      0.270802
      0.427350
      0.521739
      0.427711
    
    
      7
      San Diego
      4460
      0.362108
      0.372984
      0.348485
      0.538462
      0.297794
    
    
      8
      Dallas
      3605
      0.191401
      0.171504
      0.213450
      0.214634
      0.256881
    
    
      9
      Detroit
      3265
      0.370597
      0.081967
      0.542787
      0.568000
      0.333333
    
    
      10
      San Francisco
      3020
      0.316225
      0.259494
      0.378472
      0.186047
      0.253333
    
    
      11
      San Antonio
      2955
      0.624365
      0.443878
      0.713924
      0.574468
      0.739130
    
    
      12
      Atlanta
      2950
      0.137288
      0.186275
      0.111399
      0.101983
      NaN
    
    
      13
      Las Vegas
      2830
      0.374558
      0.400000
      0.307692
      0.387755
      0.267857
    
    
      14
      Baltimore
      2800
      0.257143
      0.132812
      0.361842
      0.391459
      NaN
    
    
      15
      Boston
      2560
      0.476562
      0.441558
      0.582677
      0.686567
      0.750000
    
    
      16
      Jacksonville, Fla.
      2335
      0.809422
      0.713781
      0.956522
      1.000000
      0.888889
    
    
      17
      El Paso, Texas
      2260
      0.851770
      0.826446
      0.861027
      NaN
      0.861027
    
    
      18
      Columbus, Ohio
      2245
      0.405345
      0.379781
      0.518072
      0.571429
      NaN
    
    
      19
      Cleveland
      2045
      0.557457
      0.498127
      0.669014
      0.595960
      0.941176
    
    
      20
      Tucson, Ariz.
      2020
      0.398515
      0.416667
      0.375000
      NaN
      0.333333
    
    
      21
      Newark, N.J.
      2005
      0.279302
      0.207965
      0.371429
      0.519481
      0.260417
    
    
      22
      Austin, Texas
      1985
      0.294710
      0.194690
      0.426901
      0.250000
      0.453846
    
    
      23
      Memphis, Tenn.
      1970
      0.464467
      0.339130
      0.640244
      0.668874
      NaN
    
    
      24
      Milwaukee
      1960
      0.721939
      0.692884
      0.784000
      0.931034
      0.733333
    
    
      25
      San Jose, Calif.
      1875
      0.466667
      0.472340
      0.457143
      NaN
      0.406977
    
    
      26
      Miami
      1860
      0.072581
      0.030612
      0.087591
      0.000000
      0.116751
    
    
      27
      Denver
      1820
      0.282967
      0.149321
      0.489510
      0.580645
      0.391753
    
    
      28
      Sacramento, Calif.
      1820
      0.079670
      0.063380
      0.137500
      0.320000
      0.000000
    
    
      29
      Charlotte, N.C.
      1780
      0.362360
      0.294545
      0.592593
      0.833333
      0.321429
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      46
      Albuquerque, N.M.
      1340
      0.615672
      0.629630
      0.601504
      NaN
      0.566372
    
    
      47
      Jersey City, N.J.
      1170
      0.252137
      0.206452
      0.341772
      0.303030
      0.325581
    
    
      48
      Raleigh, N.C.
      1150
      0.269565
      0.206349
      0.560976
      NaN
      NaN
    
    
      49
      Rochester, N.Y.
      1150
      0.100000
      0.040936
      0.271186
      0.195122
      NaN
    
    
      50
      Cincinnati
      1145
      0.227074
      0.147727
      0.490566
      0.648649
      NaN
    
    
      51
      Long Beach, Calif.
      1115
      0.291480
      0.277228
      0.303279
      NaN
      0.312500
    
    
      52
      Birmingham, Ala.
      1110
      0.225225
      0.086022
      0.325581
      0.328125
      NaN
    
    
      53
      Wichita, Kan.
      1075
      0.600000
      0.511765
      0.933333
      NaN
      0.896552
    
    
      54
      Virginia Beach, Va.
      1070
      0.789720
      0.756250
      0.888889
      0.727273
      1.000000
    
    
      55
      Fresno, Calif.
      1040
      0.514423
      0.509615
      0.519231
      0.681818
      0.460317
    
    
      56
      Buffalo, N.Y.
      1010
      0.336634
      0.292398
      0.580645
      NaN
      0.523810
    
    
      57
      Minneapolis
      1000
      0.100000
      0.052632
      0.379310
      NaN
      NaN
    
    
      58
      Portland, Ore.
      1000
      0.210000
      0.186441
      0.391304
      NaN
      NaN
    
    
      59
      Reno, Nev.
      1000
      0.340000
      0.323864
      0.458333
      NaN
      NaN
    
    
      60
      Richmond, Va.
      1000
      0.110000
      0.101695
      0.121951
      0.208333
      NaN
    
    
      61
      Baton Rouge, La.
      980
      0.214286
      0.144068
      0.320513
      0.342466
      NaN
    
    
      62
      Jackson, Miss.
      960
      0.390625
      0.082192
      0.579832
      0.579832
      NaN
    
    
      63
      Riverside, Calif.
      955
      0.219895
      0.350000
      0.076923
      0.000000
      0.142857
    
    
      64
      Fort Lauderdale, Fla.
      950
      0.168421
      0.220183
      0.098765
      0.102564
      0.114286
    
    
      65
      St. Louis
      950
      0.589474
      0.538462
      0.671233
      0.682540
      NaN
    
    
      66
      Brownsville, Texas
      925
      0.513514
      0.500000
      0.514124
      NaN
      0.520231
    
    
      67
      Albany, N.Y.
      890
      0.185393
      0.160256
      0.363636
      NaN
      NaN
    
    
      68
      Colorado Springs, Colo.
      860
      0.604651
      0.553030
      0.775000
      NaN
      0.913043
    
    
      69
      Savannah, Ga.
      860
      0.215116
      0.076923
      0.299065
      0.170732
      0.750000
    
    
      70
      Winston-Salem, N.C.
      860
      0.575581
      0.424779
      0.864407
      0.869565
      NaN
    
    
      71
      Toledo, Ohio
      805
      0.565217
      0.530769
      0.709677
      0.750000
      NaN
    
    
      72
      Madison, Wis.
      790
      0.278481
      0.246479
      0.562500
      NaN
      NaN
    
    
      73
      Corpus Christi, Texas
      770
      0.857143
      0.893333
      0.822785
      NaN
      0.847222
    
    
      74
      San Bernardino, Calif.
      755
      0.271523
      0.263158
      0.280000
      NaN
      0.274510
    
    
      75
      Top 75 Cities
      190990
      0.446437
      0.375774
      0.526938
      0.540470
      0.528717
    
  

76 rows × 7 columns



In [5]:

    
len(df1)









    Out[5]:





76



In [6]:

    
df1[['white','non-white']].describe()



In [7]:

    
df2 = pd.melt(df1, id_vars=['city'], value_vars=['white','non-white'],
              var_name='ethnicity', value_name='percent')



In [8]:

    
df2.head()









    Out[8]:






  
    
      
      city
      ethnicity
      percent
    
  
  
    
      0
      New York
      white
      0.446387
    
    
      1
      Chicago
      white
      0.871963
    
    
      2
      Los Angeles
      white
      0.152778
    
    
      3
      Washington
      white
      0.056774
    
    
      4
      Houston
      white
      0.173735



In [9]:

    
sns.boxplot(x='ethnicity', y='percent', data=df2);

Narrative



In [10]:

    
from IPython.display import Image
Image('images/538_local_police.png', width=600)









    Out[10]:

This data comes from an article published on 538 on August 20 entitled, Most Police Don’t Live In The Cities They Serve. From the article:

In Ferguson, Missouri, where protests continue following the shooting of a teenager by a police officer this month, more than two-thirds of the civilian population is black. Only 11 percent of the police force is. The racial disparity is troubling enough on its own, but it’s also suggestive of another type of misrepresentation. Given Ferguson’s racial gap, it’s likely that many of its police officers live outside city limits.

The above dataset, published by 538 on this GitHub repository contains the fractions of police officers who live within the city they serve, for different races of police officers for 75 cities across the U.S. Now we can develop a story around that data:

On average, only a very small fraction ($\approx 35\%$) of white police officers live in the cities they serve.
On average, non-white police officers ($\approx 49\%$) are more likely to live in the cities they serve.
There is a significant spread ($\sigma\approx 25\%$) across individual cities.
For issues of racial justice, it could be useful to have more police officers live in the cities they serve.

Without the story, the data doesn't mean much to us.

Dashboards

Sometimes, these computational narratives, need to be used on contexts where the consumers no longer want to look at the code. Our collaborators at IBM have created a Jupyter incubation project (https://github.com/jupyter-incubator/dashboards) that turns any notebook into a live dashboard. You can play with the demo here (https://github.com/jupyter-incubator/showcase).

	white	non-white
count	76.000000	76.000000
mean	0.348386	0.486143
std	0.240522	0.252310
min	0.026667	0.076923
25%	0.151914	0.302225
50%	0.293472	0.473922
75%	0.502404	0.669569
max	0.962963	0.956522

	city	police_force_size	all	white	non-white	black	hispanic
0	New York	32300	0.617957	0.446387	0.764419	0.770891	0.762861
1	Chicago	12120	0.875000	0.871963	0.877400	0.897406	0.839827
2	Los Angeles	10100	0.228218	0.152778	0.263848	0.387387	0.217680
3	Washington	9340	0.115632	0.056774	0.157365	0.170189	0.089888
4	Houston	7700	0.292208	0.173735	0.399258	0.366379	0.457143
5	Philadelphia	6045	0.835401	0.776899	0.899480	0.924658	0.817391
6	Phoenix	4475	0.311732	0.270802	0.427350	0.521739	0.427711
7	San Diego	4460	0.362108	0.372984	0.348485	0.538462	0.297794
8	Dallas	3605	0.191401	0.171504	0.213450	0.214634	0.256881
9	Detroit	3265	0.370597	0.081967	0.542787	0.568000	0.333333
10	San Francisco	3020	0.316225	0.259494	0.378472	0.186047	0.253333
11	San Antonio	2955	0.624365	0.443878	0.713924	0.574468	0.739130
12	Atlanta	2950	0.137288	0.186275	0.111399	0.101983	NaN
13	Las Vegas	2830	0.374558	0.400000	0.307692	0.387755	0.267857
14	Baltimore	2800	0.257143	0.132812	0.361842	0.391459	NaN
15	Boston	2560	0.476562	0.441558	0.582677	0.686567	0.750000
16	Jacksonville, Fla.	2335	0.809422	0.713781	0.956522	1.000000	0.888889
17	El Paso, Texas	2260	0.851770	0.826446	0.861027	NaN	0.861027
18	Columbus, Ohio	2245	0.405345	0.379781	0.518072	0.571429	NaN
19	Cleveland	2045	0.557457	0.498127	0.669014	0.595960	0.941176
20	Tucson, Ariz.	2020	0.398515	0.416667	0.375000	NaN	0.333333
21	Newark, N.J.	2005	0.279302	0.207965	0.371429	0.519481	0.260417
22	Austin, Texas	1985	0.294710	0.194690	0.426901	0.250000	0.453846
23	Memphis, Tenn.	1970	0.464467	0.339130	0.640244	0.668874	NaN
24	Milwaukee	1960	0.721939	0.692884	0.784000	0.931034	0.733333
25	San Jose, Calif.	1875	0.466667	0.472340	0.457143	NaN	0.406977
26	Miami	1860	0.072581	0.030612	0.087591	0.000000	0.116751
27	Denver	1820	0.282967	0.149321	0.489510	0.580645	0.391753
28	Sacramento, Calif.	1820	0.079670	0.063380	0.137500	0.320000	0.000000
29	Charlotte, N.C.	1780	0.362360	0.294545	0.592593	0.833333	0.321429
...	...	...	...	...	...	...	...
46	Albuquerque, N.M.	1340	0.615672	0.629630	0.601504	NaN	0.566372
47	Jersey City, N.J.	1170	0.252137	0.206452	0.341772	0.303030	0.325581
48	Raleigh, N.C.	1150	0.269565	0.206349	0.560976	NaN	NaN
49	Rochester, N.Y.	1150	0.100000	0.040936	0.271186	0.195122	NaN
50	Cincinnati	1145	0.227074	0.147727	0.490566	0.648649	NaN
51	Long Beach, Calif.	1115	0.291480	0.277228	0.303279	NaN	0.312500
52	Birmingham, Ala.	1110	0.225225	0.086022	0.325581	0.328125	NaN
53	Wichita, Kan.	1075	0.600000	0.511765	0.933333	NaN	0.896552
54	Virginia Beach, Va.	1070	0.789720	0.756250	0.888889	0.727273	1.000000
55	Fresno, Calif.	1040	0.514423	0.509615	0.519231	0.681818	0.460317
56	Buffalo, N.Y.	1010	0.336634	0.292398	0.580645	NaN	0.523810
57	Minneapolis	1000	0.100000	0.052632	0.379310	NaN	NaN
58	Portland, Ore.	1000	0.210000	0.186441	0.391304	NaN	NaN
59	Reno, Nev.	1000	0.340000	0.323864	0.458333	NaN	NaN
60	Richmond, Va.	1000	0.110000	0.101695	0.121951	0.208333	NaN
61	Baton Rouge, La.	980	0.214286	0.144068	0.320513	0.342466	NaN
62	Jackson, Miss.	960	0.390625	0.082192	0.579832	0.579832	NaN
63	Riverside, Calif.	955	0.219895	0.350000	0.076923	0.000000	0.142857
64	Fort Lauderdale, Fla.	950	0.168421	0.220183	0.098765	0.102564	0.114286
65	St. Louis	950	0.589474	0.538462	0.671233	0.682540	NaN
66	Brownsville, Texas	925	0.513514	0.500000	0.514124	NaN	0.520231
67	Albany, N.Y.	890	0.185393	0.160256	0.363636	NaN	NaN
68	Colorado Springs, Colo.	860	0.604651	0.553030	0.775000	NaN	0.913043
69	Savannah, Ga.	860	0.215116	0.076923	0.299065	0.170732	0.750000
70	Winston-Salem, N.C.	860	0.575581	0.424779	0.864407	0.869565	NaN
71	Toledo, Ohio	805	0.565217	0.530769	0.709677	0.750000	NaN
72	Madison, Wis.	790	0.278481	0.246479	0.562500	NaN	NaN
73	Corpus Christi, Texas	770	0.857143	0.893333	0.822785	NaN	0.847222
74	San Bernardino, Calif.	755	0.271523	0.263158	0.280000	NaN	0.274510
75	Top 75 Cities	190990	0.446437	0.375774	0.526938	0.540470	0.528717