In [2]:
#graph demographics of DC
# scraped from https://matthewbgilmore.wordpress.com/district-of-columbia-population-history/
pop = '1880 118,006 59,596 22 1890 154,695 75,572 125 1900 191,532 86,702 484 1910 236,128 94,446 495 1920 326,860 109,996 745 1930 353,981 132,068 820 1940 474,326 187,266 1,499 1950 517,865 280,803 3,510 1960 345,263 411,737 6,956 1970 209,272 537,712 9,526 1980 171,796 448,229 7,667 1990 179,667 399,604 12,792 2000 176,101 343,312 52,646 2010 231,471 305,125 65,127'
pop = pop.split()
pop = [x.replace(',', '') for x in pop]
chunks = [pop[x:x+4] for x in range(0, len(pop), 4)]
In [3]:
chunks
Out[3]:
In [5]:
from datetime import datetime
import pandas as pd
yr = []
w = []
b = []
o = []
for lst in chunks:
yr.append(lst[0])
w.append(int(lst[1]))
b.append(int(lst[2]))
o.append(int(lst[3]))
data = dict(white=w, black=b, other=o, year=yr)
df = pd.DataFrame(data)
df['year'] = pd.to_datetime(df['year'], format='%Y')
df['time'] = df['year'].dt.strftime('%Y')
df
Out[5]:
In [7]:
from bokeh.charts import Line, show, output_file, output_notebook
from bokeh.models import PrintfTickFormatter, Range1d
# build a dataset where multiple columns measure the same thing
# create a line chart where each column of measures receives a unique color and dash style
line = Line(df, y=['black', 'white', 'other'],
x='year',
# dash=['python', 'pypy', 'jython'],
color=['black', 'white', 'other'],
legend_sort_field = 'color',
legend_sort_direction = 'ascending',
title="Demographic change in DC", ylabel='population',
xlabel='year', legend=True,
xscale='datetime', yscale='linear',
width=600, height=400)
line.yaxis[0].formatter = PrintfTickFormatter(format="%5.8u")
line.y_range = Range1d(0, 600000)
output_notebook()
show(line)
#area2 = Area(df, x='year', y=['white', 'black', 'other'], title="Demographic change in DC", legend="top_left",
# stack=True, xlabel='year', ylabel='population', width=800, height=600, xscale='datetime', yscale='linear')
In [12]:
# line chart for DC Jewish popululation in 1956
pop = {"NE, DC": 8100, "NW-West of Rock Creek, DC": 6400, "NW-East of Rock Creek, DC": 22200,
"SE and SW, DC": 3600, "VA": 6400, "Prince Georges Country": 8700, "Montgomery Country": 25500}
type(pop)
Out[12]:
In [15]:
import pandas
pop1956 = pandas.DataFrame.from_dict(pop, orient='index')
pop1956 = pop1956.reset_index(drop=False)
pop1956.columns = ['community', 'population']
pop1956 = pop1956.sort_values(by='population', ascending = False)
pop1956
Out[15]:
In [6]:
Out[6]:
In [8]:
from bokeh.charts import Bar, output_file, show
import pandas
from bokeh.charts.attributes import cat
p = Bar(df, title="Distribution of Greater Washington's Jewish Community in 1956",
ylabel='population', values='population', label=cat(columns='community',sort=False), legend = None)
output_file("bar.html")
show(p)
In [ ]: