notebook.community

Edit and run



In [2]:

    
#graph demographics of DC
# scraped from https://matthewbgilmore.wordpress.com/district-of-columbia-population-history/
pop = '1880 118,006 59,596 22 1890 154,695 75,572 125 1900 191,532 86,702 484 1910 236,128 94,446 495 1920 326,860 109,996 745 1930 353,981 132,068 820 1940 474,326 187,266 1,499 1950 517,865 280,803 3,510 1960 345,263 411,737 6,956 1970 209,272 537,712 9,526 1980 171,796 448,229 7,667 1990 179,667 399,604 12,792 2000 176,101 343,312 52,646 2010 231,471 305,125 65,127'
pop = pop.split()
pop = [x.replace(',', '') for x in pop]
chunks = [pop[x:x+4] for x in range(0, len(pop), 4)]



In [3]:

    
chunks









    Out[3]:





[['1880', '118006', '59596', '22'],
 ['1890', '154695', '75572', '125'],
 ['1900', '191532', '86702', '484'],
 ['1910', '236128', '94446', '495'],
 ['1920', '326860', '109996', '745'],
 ['1930', '353981', '132068', '820'],
 ['1940', '474326', '187266', '1499'],
 ['1950', '517865', '280803', '3510'],
 ['1960', '345263', '411737', '6956'],
 ['1970', '209272', '537712', '9526'],
 ['1980', '171796', '448229', '7667'],
 ['1990', '179667', '399604', '12792'],
 ['2000', '176101', '343312', '52646'],
 ['2010', '231471', '305125', '65127']]



In [5]:

    
from datetime import datetime
import pandas as pd

yr = []
w = []
b = []
o = []

for lst in chunks:

    yr.append(lst[0])
    w.append(int(lst[1]))
    b.append(int(lst[2]))
    o.append(int(lst[3]))
data = dict(white=w, black=b, other=o, year=yr)


df = pd.DataFrame(data)
df['year'] =  pd.to_datetime(df['year'], format='%Y')
df['time'] = df['year'].dt.strftime('%Y')
df



In [7]:

    
from bokeh.charts import Line, show, output_file, output_notebook
from bokeh.models import PrintfTickFormatter, Range1d
# build a dataset where multiple columns measure the same thing



# create a line chart where each column of measures receives a unique color and dash style
line = Line(df, y=['black', 'white', 'other'],
            x='year',
           # dash=['python', 'pypy', 'jython'],
            color=['black', 'white', 'other'],
            legend_sort_field = 'color',
            legend_sort_direction = 'ascending',
            title="Demographic change in DC", ylabel='population', 
            xlabel='year', legend=True, 
            xscale='datetime', yscale='linear',
           width=600, height=400)

line.yaxis[0].formatter = PrintfTickFormatter(format="%5.8u")
line.y_range = Range1d(0, 600000)
output_notebook()

show(line)

#area2 = Area(df, x='year', y=['white', 'black', 'other'], title="Demographic change in DC", legend="top_left",
 #            stack=True, xlabel='year', ylabel='population', width=800, height=600, xscale='datetime', yscale='linear')









    





    
        
        Loading BokehJS ...



In [12]:

    
# line chart for DC Jewish popululation in 1956
pop = {"NE, DC": 8100, "NW-West of Rock Creek, DC": 6400, "NW-East of Rock Creek, DC": 22200,
       "SE and SW, DC": 3600, "VA": 6400, "Prince Georges Country": 8700, "Montgomery Country": 25500}
type(pop)









    Out[12]:





dict



In [15]:

    
import pandas
pop1956 = pandas.DataFrame.from_dict(pop, orient='index')
pop1956 = pop1956.reset_index(drop=False)
pop1956.columns = ['community', 'population']
pop1956 = pop1956.sort_values(by='population', ascending = False)
pop1956









    Out[15]:






  
    
      
      community
      population
    
  
  
    
      6
      Montgomery Country
      25500
    
    
      2
      NW-East of Rock Creek, DC
      22200
    
    
      4
      Prince Georges Country
      8700
    
    
      5
      NE, DC
      8100
    
    
      0
      VA
      6400
    
    
      1
      NW-West of Rock Creek, DC
      6400
    
    
      3
      SE and SW, DC
      3600



In [6]:









    Out[6]:






  
    
      
      community
      population
    
  
  
    
      3
      Montgomery Country
      25500
    
    
      4
      NW-East of Rock Creek, DC
      22200
    
    
      5
      Prince Georges Country
      8700
    
    
      6
      NE, DC
      8100
    
    
      0
      NW-West of Rock Creek, DC
      6400
    
    
      1
      VA
      6400
    
    
      2
      SE and SW, DC
      3600



In [8]:

    
from bokeh.charts import Bar, output_file, show
import pandas
from bokeh.charts.attributes import cat


p = Bar(df, title="Distribution of Greater Washington's Jewish Community in 1956", 
        ylabel='population', values='population', label=cat(columns='community',sort=False), legend = None)
output_file("bar.html")

show(p)



In [ ]:

	black	other	white	year	time
0	59596	22	118006	1880-01-01	1880
1	75572	125	154695	1890-01-01	1890
2	86702	484	191532	1900-01-01	1900
3	94446	495	236128	1910-01-01	1910
4	109996	745	326860	1920-01-01	1920
5	132068	820	353981	1930-01-01	1930
6	187266	1499	474326	1940-01-01	1940
7	280803	3510	517865	1950-01-01	1950
8	411737	6956	345263	1960-01-01	1960
9	537712	9526	209272	1970-01-01	1970
10	448229	7667	171796	1980-01-01	1980
11	399604	12792	179667	1990-01-01	1990
12	343312	52646	176101	2000-01-01	2000
13	305125	65127	231471	2010-01-01	2010

	community	population
6	Montgomery Country	25500
2	NW-East of Rock Creek, DC	22200
4	Prince Georges Country	8700
5	NE, DC	8100
0	VA	6400
1	NW-West of Rock Creek, DC	6400
3	SE and SW, DC	3600