In [1]:
!pip install pandas
Requirement already satisfied (use --upgrade to upgrade): pandas in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): pytz>=2011k in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.7.0 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): python-dateutil>=2 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from python-dateutil>=2->pandas)
In [2]:
!pip install matplotlib
Requirement already satisfied (use --upgrade to upgrade): matplotlib in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.6 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): python-dateutil in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): cycler in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): pyparsing!=2.0.0,!=2.0.4,>=1.5.6 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): pytz in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages (from python-dateutil->matplotlib)
In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
/Users/mercybenzaquen/.virtualenvs/Homework8/lib/python3.5/site-packages/matplotlib/__init__.py:1035: UserWarning: Duplicate key in file "/Users/mercybenzaquen/.matplotlib/matplotlibrc", line #2
(fname, cnt))
In [4]:
df = pd.read_csv("congress.csv", error_bad_lines=False)
b'Skipping line 7054: expected 13 fields, saw 14\nSkipping line 7581: expected 13 fields, saw 14\nSkipping line 8088: expected 13 fields, saw 14\nSkipping line 9719: expected 13 fields, saw 15\nSkipping line 10019: expected 13 fields, saw 15\nSkipping line 10235: expected 13 fields, saw 15\nSkipping line 10550: expected 13 fields, saw 15\nSkipping line 10641: expected 13 fields, saw 14\nSkipping line 10764: expected 13 fields, saw 15\nSkipping line 11075: expected 13 fields, saw 15\nSkipping line 11168: expected 13 fields, saw 14\nSkipping line 11290: expected 13 fields, saw 15\nSkipping line 11606: expected 13 fields, saw 15\nSkipping line 11697: expected 13 fields, saw 14\nSkipping line 12141: expected 13 fields, saw 15\nSkipping line 12230: expected 13 fields, saw 14\nSkipping line 12664: expected 13 fields, saw 15\nSkipping line 12738: expected 13 fields, saw 14\n'
In [6]:
df.head()
#bioguide: The alphanumeric ID for legislators in http://bioguide.congress.gov.
Out[6]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
0
80
house
M000112
Joseph
Jefferson
Mansfield
NaN
1861-02-09
TX
D
Yes
1947-01-03
85.9
1
80
house
D000448
Robert
Lee
Doughton
NaN
1863-11-07
NC
D
Yes
1947-01-03
83.2
2
80
house
S000001
Adolph
Joachim
Sabath
NaN
1866-04-04
IL
D
Yes
1947-01-03
80.7
3
80
house
E000023
Charles
Aubrey
Eaton
NaN
1868-03-29
NJ
R
Yes
1947-01-03
78.8
4
80
house
L000296
William
NaN
Lewis
NaN
1868-09-22
KY
R
No
1947-01-03
78.3
In [70]:
df['chamber'].value_counts() #sounds like a lot. We might have repetitions.
Out[70]:
house 15065
senate 3552
Name: chamber, dtype: int64
In [71]:
df['bioguide'].describe() #we count the bioguide, which is unique to each legislator.
#There are only 3188 unique values, hence only 3188 senators and representatives in total.
Out[71]:
count 18617
unique 3188
top D000355
freq 30
Name: bioguide, dtype: object
In [79]:
total_democrats = (df['party'] == 'D').value_counts()
total_democrats
Out[79]:
True 10284
False 8333
Name: party, dtype: int64
In [271]:
total_republicans =(df['party'] == 'R').value_counts()
total_republicans
Out[271]:
False 10355
True 8262
Name: party, dtype: int64
In [218]:
df['age'].describe()
Out[218]:
count 18617.000000
mean 53.314841
std 10.679143
min 25.000000
25% 45.400000
50% 53.000000
75% 60.500000
max 98.100000
Name: age, dtype: float64
In [219]:
df.groupby("chamber")['age'].describe()
Out[219]:
chamber
house count 15065.000000
mean 52.366850
std 10.507940
min 25.000000
25% 44.600000
50% 52.000000
75% 59.600000
max 89.700000
senate count 3552.000000
mean 57.335529
std 10.463303
min 28.200000
25% 49.875000
50% 56.800000
75% 64.400000
max 98.100000
Name: age, dtype: float64
In [246]:
df['state'].value_counts()
Out[246]:
CA 1534
NY 1347
TX 985
PA 939
IL 849
OH 809
MI 670
FL 629
NJ 556
MA 478
NC 470
GA 441
VA 428
IN 424
MO 414
WI 386
TN 381
MN 356
LA 343
WA 338
MD 334
AL 334
KY 323
SC 283
IA 282
OK 277
CT 272
MS 251
CO 248
KS 243
AR 227
OR 227
AZ 226
WV 222
NE 184
NM 155
UT 154
ME 146
NH 142
RI 141
ID 138
MT 130
NV 130
HI 123
SD 123
ND 119
WY 108
VT 104
DE 102
AK 92
Name: state, dtype: int64
In [53]:
df.groupby("state")['chamber'].value_counts()
Out[53]:
state chamber
AK senate 58
house 34
AL house 263
senate 71
AR house 158
senate 69
AZ house 158
senate 68
CA house 1458
senate 76
CO house 179
senate 69
CT house 201
senate 71
DE senate 71
house 31
FL house 558
senate 71
GA house 370
senate 71
HI house 65
senate 58
IA house 214
senate 68
ID senate 70
house 68
IL house 778
senate 71
IN house 356
senate 68
...
OK house 207
senate 70
OR house 155
senate 72
PA house 870
senate 69
RI senate 72
house 69
SC house 209
senate 74
SD senate 71
house 52
TN house 310
senate 71
TX house 910
senate 75
UT house 85
senate 69
VA house 357
senate 71
VT senate 69
house 35
WA house 267
senate 71
WI house 317
senate 69
WV house 148
senate 74
WY senate 73
house 35
Name: chamber, dtype: int64
In [54]:
df['termstart'].describe() #here we would look at unique.
Out[54]:
count 18617
unique 34
top 1961-01-03
freq 559
Name: termstart, dtype: object
In [55]:
df.sort_values(by='age').tail(1) #A senator!
Out[55]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
15237
107
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
2001-01-03
98.1
J. Strom Thurmond
In [56]:
representative = df[df['chamber'] == 'house']
representative.sort_values(by='age').tail(1)
Out[56]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
18073
113
house
H000067
Ralph
M.
Hall
NaN
1923-05-03
TX
R
Yes
2013-01-03
89.7
Ralph M. Hall
In [57]:
representative.sort_values(by='age').head(2)
Out[57]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
5422
89
house
J000151
Jed
Joseph
Johnson
Jr.
1939-12-27
OK
D
No
1965-01-04
25.0
Jed Joseph Johnson
452
80
house
B000401
Lloyd
Millard
Bentsen
Jr.
1921-02-11
TX
D
No
1947-01-03
25.9
Lloyd Millard Bentsen
In [58]:
senator = df[df['chamber'] == 'senate']
senator.sort_values(by='age')
Out[58]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
554
80
senate
L000428
Russell
Billiu
Long
NaN
1918-11-03
LA
D
Yes
1947-01-03
28.2
Russell Billiu Long
4427
87
senate
K000105
Edward
M.
Kennedy
NaN
1932-02-22
MA
D
No
1961-01-03
28.9
Edward M. Kennedy
7721
93
senate
B000444
Joseph
R.
Biden
Jr.
1942-11-20
DE
D
No
1973-01-03
30.1
Joseph R. Biden
1111
81
senate
L000428
Russell
Billiu
Long
NaN
1918-11-03
LA
D
Yes
1949-01-03
30.2
Russell Billiu Long
4979
88
senate
K000105
Edward
M.
Kennedy
NaN
1932-02-22
MA
D
Yes
1963-01-09
30.9
Edward M. Kennedy
9910
97
senate
N000102
Don
NaN
Nickles
NaN
1948-12-06
OK
R
No
1981-01-05
32.1
NaN
4978
88
senate
H000237
Fred
Roy
Harris
NaN
1930-11-13
OK
D
No
1963-01-09
32.2
Fred Roy Harris
8271
94
senate
B000444
Joseph
R.
Biden
Jr.
1942-11-20
DE
D
Yes
1975-01-14
32.2
Joseph R. Biden
1666
82
senate
L000428
Russell
Billiu
Long
NaN
1918-11-03
LA
D
Yes
1951-01-03
32.2
Russell Billiu Long
3314
85
senate
C000388
Frank
Forrester
Church
NaN
1924-07-25
ID
D
No
1957-01-03
32.4
Frank Forrester Church
7171
92
senate
N000171
Samuel
Augustus
Nunn
NaN
1938-09-08
GA
D
No
1971-01-21
32.4
Samuel Augustus Nunn
5527
89
senate
K000105
Edward
M.
Kennedy
NaN
1932-02-22
MA
D
Yes
1965-01-04
32.9
Edward M. Kennedy
4426
87
senate
M001100
Maurice
J.
Murphy
Jr.
1927-10-03
NH
R
No
1961-01-03
33.3
Maurice J. Murphy
9909
97
senate
Q000007
James
Danforth
Quayle
NaN
1947-02-04
IN
R
Yes
1981-01-05
33.9
James Danforth Quayle
5526
89
senate
H000237
Fred
Roy
Harris
NaN
1930-11-13
OK
D
Yes
1965-01-04
34.1
Fred Roy Harris
10452
98
senate
N000102
Don
NaN
Nickles
NaN
1948-12-06
OK
R
Yes
1983-01-03
34.1
NaN
8824
95
senate
B000444
Joseph
R.
Biden
Jr.
1942-11-20
DE
D
Yes
1977-01-04
34.1
Joseph R. Biden
2223
83
senate
L000428
Russell
Billiu
Long
NaN
1918-11-03
LA
D
Yes
1953-01-03
34.2
Russell Billiu Long
1665
82
senate
P000460
Charles
Edward
Potter
NaN
1916-10-30
MI
R
Yes
1951-01-03
34.2
Charles Edward Potter
7720
93
senate
N000171
Samuel
Augustus
Nunn
NaN
1938-09-08
GA
D
Yes
1973-01-03
34.3
Samuel Augustus Nunn
6623
91
senate
T000410
John
Varick
Tunney
NaN
1934-06-26
CA
D
Yes
1969-01-03
34.5
John Varick Tunney
3868
86
senate
C000388
Frank
Forrester
Church
NaN
1924-07-25
ID
D
Yes
1959-01-07
34.5
Frank Forrester Church
8270
94
senate
L000174
Patrick
J.
Leahy
NaN
1940-03-31
VT
D
No
1975-01-14
34.8
Patrick J. Leahy
6070
90
senate
K000105
Edward
M.
Kennedy
NaN
1932-02-22
MA
D
Yes
1967-01-10
34.9
Edward M. Kennedy
4977
88
senate
B000254
Birch
Evans
Bayh
NaN
1928-01-22
IN
D
No
1963-01-09
35.0
Birch Evans Bayh
4976
88
senate
M000851
Walter
Frederick
Mondale
NaN
1928-01-05
MN
I
No
1963-01-09
35.0
Walter Frederick Mondale
8823
95
senate
B000243
Max
S.
Baucus
NaN
1941-12-11
MT
D
Yes
1977-01-04
35.1
Max S. Baucus
4425
87
senate
T000322
John
Goodwin
Tower
NaN
1925-09-29
TX
R
No
1961-01-03
35.3
John Goodwin Tower
9367
96
senate
B001225
William
Warren
Bradley
NaN
1943-07-28
NJ
D
No
1979-01-15
35.5
William Warren Bradley
2222
83
senate
K000107
John
Fitzgerald
Kennedy
NaN
1917-05-29
MA
D
Yes
1953-01-03
35.6
John Fitzgerald Kennedy
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1562
82
senate
G000418
Theodore
Francis
Green
NaN
1867-10-02
RI
D
Yes
1951-01-03
83.3
Theodore Francis Green
4318
87
senate
H000385
Carl
Trumbull
Hayden
NaN
1877-10-02
AZ
D
Yes
1961-01-03
83.3
Carl Trumbull Hayden
10890
99
senate
S000852
John
Cornelius
Stennis
NaN
1901-08-03
MS
D
Yes
1985-01-03
83.4
John Cornelius Stennis
11433
100
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1987-01-06
84.1
J. Strom Thurmond
17417
111
senate
I000025
Daniel
K.
Inouye
NaN
1924-09-07
HI
D
Yes
2009-01-06
84.3
Daniel K. Inouye
17418
111
senate
A000069
Daniel
Kahikina
Akaka
NaN
1924-09-11
HI
D
Yes
2009-01-06
84.3
Daniel Kahikina Akaka
17416
111
senate
L000123
Frank
R.
Lautenberg
NaN
1924-01-23
NJ
D
Yes
2009-01-06
85.0
Frank R. Lautenberg
15780
108
senate
B001210
Robert
C.
Byrd
NaN
1917-11-20
WV
D
Yes
2003-01-07
85.1
Robert C. Byrd
2110
83
senate
G000418
Theodore
Francis
Green
NaN
1867-10-02
RI
D
Yes
1953-01-03
85.3
Theodore Francis Green
4872
88
senate
H000385
Carl
Trumbull
Hayden
NaN
1877-10-02
AZ
D
Yes
1963-01-09
85.3
Carl Trumbull Hayden
11432
100
senate
S000852
John
Cornelius
Stennis
NaN
1901-08-03
MS
D
Yes
1987-01-06
85.4
John Cornelius Stennis
11976
101
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1989-01-03
86.1
J. Strom Thurmond
17972
112
senate
A000069
Daniel
Kahikina
Akaka
NaN
1924-09-11
HI
D
Yes
2011-01-05
86.3
Daniel Kahikina Akaka
17971
112
senate
L000123
Frank
R.
Lautenberg
NaN
1924-01-23
NJ
D
Yes
2011-01-05
87.0
Frank R. Lautenberg
16320
109
senate
B001210
Robert
C.
Byrd
NaN
1917-11-20
WV
D
Yes
2005-01-04
87.1
Robert C. Byrd
5423
89
senate
H000385
Carl
Trumbull
Hayden
NaN
1877-10-02
AZ
D
Yes
1965-01-04
87.3
Carl Trumbull Hayden
2665
84
senate
G000418
Theodore
Francis
Green
NaN
1867-10-02
RI
D
Yes
1955-01-05
87.3
Theodore Francis Green
12518
102
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1991-01-03
88.1
J. Strom Thurmond
16869
110
senate
B001210
Robert
C.
Byrd
NaN
1917-11-20
WV
D
Yes
2007-01-04
89.1
Robert C. Byrd
5967
90
senate
H000385
Carl
Trumbull
Hayden
NaN
1877-10-02
AZ
D
Yes
1967-01-10
89.3
Carl Trumbull Hayden
3213
85
senate
G000418
Theodore
Francis
Green
NaN
1867-10-02
RI
D
Yes
1957-01-03
89.3
Theodore Francis Green
13063
103
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1993-01-05
90.1
J. Strom Thurmond
18072
112
senate
I000025
Daniel
K.
Inouye
NaN
1920-09-06
HI
D
Yes
2011-01-05
90.3
Daniel K. Inouye
3763
86
senate
G000418
Theodore
Francis
Green
NaN
1867-10-02
RI
D
Yes
1959-01-07
91.3
Theodore Francis Green
13609
104
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1995-01-04
92.1
J. Strom Thurmond
18616
113
senate
L000123
Frank
R.
Lautenberg
NaN
1920-01-22
NJ
D
Yes
2013-01-03
93.0
Frank R. Lautenberg
14156
105
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1997-01-07
94.1
J. Strom Thurmond
17517
111
senate
B001210
Robert
C.
Byrd
NaN
1913-11-19
WV
D
Yes
2009-01-06
95.1
Robert C. Byrd
14693
106
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
1999-01-06
96.1
J. Strom Thurmond
15237
107
senate
T000254
J.
Strom
Thurmond
NaN
1902-12-05
SC
R
Yes
2001-01-03
98.1
J. Strom Thurmond
3552 rows × 14 columns
In [59]:
senator.sort_values(by='age').head(2)
Out[59]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
554
80
senate
L000428
Russell
Billiu
Long
NaN
1918-11-03
LA
D
Yes
1947-01-03
28.2
Russell Billiu Long
4427
87
senate
K000105
Edward
M.
Kennedy
NaN
1932-02-22
MA
D
No
1961-01-03
28.9
Edward M. Kennedy
In [60]:
# Store a new column
df['complete_name'] = df['firstname']+ " "+ df['middlename'] + " "+df['lastname']
df.head()
Out[60]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
complete_name
0
80
house
M000112
Joseph
Jefferson
Mansfield
NaN
1861-02-09
TX
D
Yes
1947-01-03
85.9
Joseph Jefferson Mansfield
1
80
house
D000448
Robert
Lee
Doughton
NaN
1863-11-07
NC
D
Yes
1947-01-03
83.2
Robert Lee Doughton
2
80
house
S000001
Adolph
Joachim
Sabath
NaN
1866-04-04
IL
D
Yes
1947-01-03
80.7
Adolph Joachim Sabath
3
80
house
E000023
Charles
Aubrey
Eaton
NaN
1868-03-29
NJ
R
Yes
1947-01-03
78.8
Charles Aubrey Eaton
4
80
house
L000296
William
NaN
Lewis
NaN
1868-09-22
KY
R
No
1947-01-03
78.3
NaN
In [61]:
period_count = df.groupby('complete_name')['termstart'].value_counts().sort_values(ascending=False)
pd.DataFrame(period_count)
#With the help of Stephan we figured out that term-start is every 2 years
#(so this is not giving us info about how many terms has each legislator served)
Out[61]:
termstart
complete_name
termstart
Barry Morris Goldwater
1981-01-05
2
Daniel Kahikina Akaka
1989-01-03
2
Max S. Baucus
1977-01-04
2
Carl Thomas Curtis
1953-01-03
2
Samuel D. Brownback
1995-01-04
2
James Thomas Broyhill
1985-01-03
2
Donald M. Payne
2011-01-05
2
J. Strom Thurmond
1963-01-09
2
Eugene A. Chappie
1985-01-03
2
1983-01-03
2
James M. Inhofe
1993-01-05
2
Barry Morris Goldwater
1977-01-04
2
Richard Milhous Nixon
1949-01-03
2
Barry Morris Goldwater
1975-01-14
2
1973-01-03
2
Wayne Lyman Morse
1955-01-05
2
Roger F. Wicker
2007-01-04
2
Kirsten E. Gillibrand
2009-01-06
2
Barry Morris Goldwater
1971-01-21
2
1969-01-03
2
Harry Flood Byrd
1965-01-04
2
Edward J. Markey
2013-01-03
2
Barry Morris Goldwater
1979-01-15
2
Richard C. Shelby
1993-01-05
2
Edwin Washington Edwards
1971-01-21
2
Mark Steven Kirk
2009-01-06
2
Roman Lee Hruska
1953-01-03
2
William V. Roth
1969-01-03
2
John Varick Tunney
1969-01-03
2
Robert Theodore Stafford
1971-01-21
2
...
...
...
Milton Horace West
1947-01-03
1
Milton Robert Carr
1977-01-04
1
Milton Willits Glenn
1961-01-03
1
Milton Robert Carr
1979-01-15
1
Milton Willits Glenn
1959-01-07
1
1957-01-03
1
Milton Ruben Young
1979-01-15
1
1977-01-04
1
1975-01-14
1
1973-01-03
1
1971-01-21
1
1969-01-03
1
1967-01-10
1
1965-01-04
1
1963-01-09
1
1961-01-03
1
1959-01-07
1
1957-01-03
1
1955-01-05
1
1953-01-03
1
1951-01-03
1
1949-01-03
1
1947-01-03
1
Milton Robert Carr
1993-01-05
1
1991-01-03
1
1989-01-03
1
1987-01-06
1
1985-01-03
1
1983-01-03
1
Abe McGregor Goff
1947-01-03
1
15045 rows × 1 columns
In [62]:
terms_served_by_senators= senator.groupby('complete_name')['bioguide'].value_counts()
years= terms_served_by_senators * 2
total_years_served = years.sort_values(ascending=False)
pd.DataFrame(total_years_served)
Out[62]:
bioguide
complete_name
bioguide
Robert C. Byrd
B001210
52
J. Strom Thurmond
T000254
52
Edward M. Kennedy
K000105
50
Daniel K. Inouye
I000025
50
John Cornelius Stennis
S000852
42
Ted F. Stevens
S000888
42
Ernest F. Hollings
H000725
40
Russell Billiu Long
L000428
40
Patrick J. Leahy
L000174
40
Max S. Baucus
B000243
38
Orrin G. Hatch
H000338
38
Joseph R. Biden
B000444
38
Richard G. Lugar
L000504
36
Claiborne de Borda Pell
P000193
36
Pete V. Domenici
D000407
36
Charles E. Grassley
G000386
34
Quentin Northrup Burdick
B001077
34
Milton Ruben Young
Y000047
34
Warren Grant Magnuson
M000053
34
John W. Warner
W000154
32
John Little McClellan
M000332
32
William V. Roth
R000460
32
John Jackson Sparkman
S000701
32
James Oliver Eastland
E000018
32
Henry Martin Jackson
J000013
32
Frank R. Lautenberg
L000123
30
John D. Rockefeller
R000361
30
John Forbes Kerry
K000148
30
Richard C. Shelby
S000320
30
Paul S. Sarbanes
S000064
30
...
...
...
Jocelyn Birch Burdick
B001076
2
John E. Walsh
W000818
2
Edward J. Markey
M000133
2
Edward Hall Moore
M000895
2
Robert Charles Krueger
K000333
2
Wallace Humphrey White
W000396
2
David Henry Gambrell
G000034
2
David Kemp Karnes
K000011
2
Edward E. Kaufman
K000373
2
Edward David Crippa
C000906
2
Donald Stuart Russell
R000525
2
John Holmes Overton
O000146
2
Edward Vivian Robertson
R000320
2
Clayton Douglass Buck
B001013
2
James Howard Edmondson
E000055
2
Carte P. Goodwin
G000561
2
Vera Cahalan Bushfield
B001169
2
Joseph Hurst Ball
B000099
2
Joseph H. Bottum
B000656
2
Eva Kelly Bowring
B000709
2
Charles Ezra Daniel
D000031
2
Ernest S. Brown
B000913
2
James Thomas Broyhill
B000966
2
Charles Wayland Brooks
B000874
2
Edwin Washington Edwards
E000067
2
John Joseph Hickey
H000561
2
Christopher S. Murphy
M001169
2
Wilbert Lee O'Daniel
O000034
2
Clarence Norman Brunsdale
B000982
2
John Dempsey Hoblitzell
H000665
2
448 rows × 1 columns
In [63]:
terms_served_by_representative= representative.groupby("complete_name")['bioguide'].value_counts()
years= terms_served_by_representative * 2
total_years_served = years.sort_values(ascending=False)
pd.DataFrame(total_years_served)
Out[63]:
bioguide
complete_name
bioguide
John D. Dingell
D000355
60
Jamie Lloyd Whitten
W000428
48
Sidney Richard Yates
Y000013
48
Charles Edward Bennett
B000371
44
C. W. Bill Young
Y000031
44
Charles B. Rangel
R000053
44
David Ross Obey
O000007
42
Don E. Young
Y000033
42
Charles Melvin Price
P000522
42
Jack Bascom Brooks
B000880
42
William Huston Natcher
N000009
42
Henry A. Waxman
W000215
40
Fortney H. Stark
S000810
40
Peter Wallace Rodino
R000374
40
Edward J. Markey
M000133
40
John P. Murtha
M001120
38
Henry Barbosa González
G000272
38
Dante Bruno Fascell
F000041
38
Robert Henry Michel
M000692
38
Nick J. Rahall
R000011
38
William S. Broomfield
B000890
36
Carl Dewey Perkins
P000230
36
Edward Patrick Boland
B000600
36
Thomas E. Petri
P000265
36
James L. Oberstar
O000006
36
George E. Brown
B000918
36
Joseph Michael McDade
M000399
36
Norman D. Dicks
D000327
36
Philip M. Crane
C000873
36
James Claude Wright
W000763
36
...
...
...
John Richard Schmidhauser
S000131
2
John Richard Walsh
W000100
2
John Robert Foley
F000237
2
John Robert Hansen
H000173
2
Keith J. Rothfus
R000598
2
Kathleen C. Hochul
H001062
2
Kathleen A. Dahlkemper
D000608
2
D. Bailey Merrill
M000656
2
Katherine M. Clark
C001101
2
K. William Stinson
S000928
2
Joseph Walker Barr
B000170
2
Joseph Scofield Ammerman
A000177
2
Joseph Peyton Wyatt
W000777
2
Joseph P. Kennedy
K000379
2
Joseph Oliva Huot
H001001
2
Joseph Jefferson Mansfield
M000112
2
Joseph James Maraziti
M000121
2
Joseph Francis Smith
S000579
2
Joseph Edward Hendricks
H000492
2
Joseph Anthony LeFante
L000561
2
Daniel T. Kildee
K000380
2
John Williams Gwynne
G000543
2
John William Flannagan
F000191
2
John W. Cox
C000836
2
Darwin Gale Schisler
S000128
2
John Travers Wood
W000700
2
David A. Levy
L000267
2
David Alan Curson
C001089
2
John Schiller Wold
W000671
2
Abe McGregor Goff
G000253
2
2171 rows × 1 columns
In [66]:
df['firstname'].value_counts()
#this might be counting the same person many times but still we can get an idea of what names are more popular
Out[66]:
John 1448
William 935
James 855
Robert 753
Thomas 512
Charles 488
George 355
Richard 333
Joseph 314
Frank 308
Edward 266
David 251
Michael 239
Paul 219
Henry 192
Daniel 171
Donald 152
Harold 146
Peter 137
Albert 132
Walter 124
Howard 123
Carl 118
Mark 94
Jim 92
Ralph 91
Tom 90
Samuel 85
J. 85
Jack 81
...
Angelo 1
D. 1
Harmar 1
Muriel 1
Sandy 1
Beto 1
Jocelyn 1
Orland 1
Heidi 1
Irwin 1
Abe 1
Betsy 1
Larkin 1
Clayton 1
Chip 1
Darwin 1
Lera 1
Thurman 1
Markwayne 1
Carter 1
Ward 1
Jed 1
Andrea 1
Nan 1
Billie 1
Garland 1
Cliffard 1
Harve 1
Filemon 1
Rolland 1
Name: firstname, dtype: int64
In [115]:
plt.style.use("ggplot")
df['age'].hist(bins=15, xlabelsize=12, ylabelsize=12, color=['y'])
Out[115]:
<matplotlib.axes._subplots.AxesSubplot at 0x134b5d048>
In [104]:
df.head(20).sort_values(by='age',ascending=True).plot(kind='barh', x=['complete_name'], y='age', color="y")
Out[104]:
<matplotlib.axes._subplots.AxesSubplot at 0x10fcb2b70>
In [136]:
df.plot.scatter(x='congress', y='age');
In [151]:
df.plot.hexbin(x='age', y='congress', gridsize=25, legend=True)
Out[151]:
<matplotlib.axes._subplots.AxesSubplot at 0x1389b4fd0>
In [ ]:
Content source: mercybenzaquen/foundations-homework
Similar notebooks: