In [2]:
#graph demographics of DC
# scraped from https://matthewbgilmore.wordpress.com/district-of-columbia-population-history/
pop = '1880 118,006 59,596 22 1890 154,695 75,572 125 1900 191,532 86,702 484 1910 236,128 94,446 495 1920 326,860 109,996 745 1930 353,981 132,068 820 1940 474,326 187,266 1,499 1950 517,865 280,803 3,510 1960 345,263 411,737 6,956 1970 209,272 537,712 9,526 1980 171,796 448,229 7,667 1990 179,667 399,604 12,792 2000 176,101 343,312 52,646 2010 231,471 305,125 65,127'
pop = pop.split()
pop = [x.replace(',', '') for x in pop]
chunks = [pop[x:x+4] for x in range(0, len(pop), 4)]

In [3]:
chunks


Out[3]:
[['1880', '118006', '59596', '22'],
 ['1890', '154695', '75572', '125'],
 ['1900', '191532', '86702', '484'],
 ['1910', '236128', '94446', '495'],
 ['1920', '326860', '109996', '745'],
 ['1930', '353981', '132068', '820'],
 ['1940', '474326', '187266', '1499'],
 ['1950', '517865', '280803', '3510'],
 ['1960', '345263', '411737', '6956'],
 ['1970', '209272', '537712', '9526'],
 ['1980', '171796', '448229', '7667'],
 ['1990', '179667', '399604', '12792'],
 ['2000', '176101', '343312', '52646'],
 ['2010', '231471', '305125', '65127']]

In [5]:
from datetime import datetime
import pandas as pd

yr = []
w = []
b = []
o = []

for lst in chunks:

    yr.append(lst[0])
    w.append(int(lst[1]))
    b.append(int(lst[2]))
    o.append(int(lst[3]))
data = dict(white=w, black=b, other=o, year=yr)


df = pd.DataFrame(data)
df['year'] =  pd.to_datetime(df['year'], format='%Y')
df['time'] = df['year'].dt.strftime('%Y')
df


Out[5]:
black other white year time
0 59596 22 118006 1880-01-01 1880
1 75572 125 154695 1890-01-01 1890
2 86702 484 191532 1900-01-01 1900
3 94446 495 236128 1910-01-01 1910
4 109996 745 326860 1920-01-01 1920
5 132068 820 353981 1930-01-01 1930
6 187266 1499 474326 1940-01-01 1940
7 280803 3510 517865 1950-01-01 1950
8 411737 6956 345263 1960-01-01 1960
9 537712 9526 209272 1970-01-01 1970
10 448229 7667 171796 1980-01-01 1980
11 399604 12792 179667 1990-01-01 1990
12 343312 52646 176101 2000-01-01 2000
13 305125 65127 231471 2010-01-01 2010

In [7]:
from bokeh.charts import Line, show, output_file, output_notebook
from bokeh.models import PrintfTickFormatter, Range1d
# build a dataset where multiple columns measure the same thing



# create a line chart where each column of measures receives a unique color and dash style
line = Line(df, y=['black', 'white', 'other'],
            x='year',
           # dash=['python', 'pypy', 'jython'],
            color=['black', 'white', 'other'],
            legend_sort_field = 'color',
            legend_sort_direction = 'ascending',
            title="Demographic change in DC", ylabel='population', 
            xlabel='year', legend=True, 
            xscale='datetime', yscale='linear',
           width=600, height=400)

line.yaxis[0].formatter = PrintfTickFormatter(format="%5.8u")
line.y_range = Range1d(0, 600000)
output_notebook()

show(line)

#area2 = Area(df, x='year', y=['white', 'black', 'other'], title="Demographic change in DC", legend="top_left",
 #            stack=True, xlabel='year', ylabel='population', width=800, height=600, xscale='datetime', yscale='linear')


Loading BokehJS ...

In [12]:
# line chart for DC Jewish popululation in 1956
pop = {"NE, DC": 8100, "NW-West of Rock Creek, DC": 6400, "NW-East of Rock Creek, DC": 22200,
       "SE and SW, DC": 3600, "VA": 6400, "Prince Georges Country": 8700, "Montgomery Country": 25500}
type(pop)


Out[12]:
dict

In [15]:
import pandas
pop1956 = pandas.DataFrame.from_dict(pop, orient='index')
pop1956 = pop1956.reset_index(drop=False)
pop1956.columns = ['community', 'population']
pop1956 = pop1956.sort_values(by='population', ascending = False)
pop1956


Out[15]:
community population
6 Montgomery Country 25500
2 NW-East of Rock Creek, DC 22200
4 Prince Georges Country 8700
5 NE, DC 8100
0 VA 6400
1 NW-West of Rock Creek, DC 6400
3 SE and SW, DC 3600

In [6]:



Out[6]:
community population
3 Montgomery Country 25500
4 NW-East of Rock Creek, DC 22200
5 Prince Georges Country 8700
6 NE, DC 8100
0 NW-West of Rock Creek, DC 6400
1 VA 6400
2 SE and SW, DC 3600

In [8]:
from bokeh.charts import Bar, output_file, show
import pandas
from bokeh.charts.attributes import cat


p = Bar(df, title="Distribution of Greater Washington's Jewish Community in 1956", 
        ylabel='population', values='population', label=cat(columns='community',sort=False), legend = None)
output_file("bar.html")

show(p)

In [ ]: