In [1]:
import pandas as pd
/Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages/matplotlib/__init__.py:1035: UserWarning: Duplicate key in file "/Users/mercybenzaquen/.matplotlib/matplotlibrc", line #2
(fname, cnt))
In [2]:
!pip install pandas
Requirement already satisfied (use --upgrade to upgrade): pandas in /Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.7.0 in /Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): pytz>=2011k in /Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): python-dateutil>=2 in /Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /Users/mercybenzaquen/.virtualenvs/Homework7/lib/python3.5/site-packages (from python-dateutil>=2->pandas)
In [3]:
import pandas as pd
In [8]:
df = pd.read_csv("congress.csv", error_bad_lines=False)
#I got an error saying CParserError: Error tokenizing data pandas
#so I added error_bad_lines=False.
b'Skipping line 7054: expected 13 fields, saw 14\nSkipping line 7581: expected 13 fields, saw 14\nSkipping line 8088: expected 13 fields, saw 14\nSkipping line 9719: expected 13 fields, saw 15\nSkipping line 10019: expected 13 fields, saw 15\nSkipping line 10235: expected 13 fields, saw 15\nSkipping line 10550: expected 13 fields, saw 15\nSkipping line 10641: expected 13 fields, saw 14\nSkipping line 10764: expected 13 fields, saw 15\nSkipping line 11075: expected 13 fields, saw 15\nSkipping line 11168: expected 13 fields, saw 14\nSkipping line 11290: expected 13 fields, saw 15\nSkipping line 11606: expected 13 fields, saw 15\nSkipping line 11697: expected 13 fields, saw 14\nSkipping line 12141: expected 13 fields, saw 15\nSkipping line 12230: expected 13 fields, saw 14\nSkipping line 12664: expected 13 fields, saw 15\nSkipping line 12738: expected 13 fields, saw 14\n'
In [10]:
df.head()
Out[10]:
congress
chamber
bioguide
firstname
middlename
lastname
suffix
birthday
state
party
incumbent
termstart
age
0
80
house
M000112
Joseph
Jefferson
Mansfield
NaN
1861-02-09
TX
D
Yes
1947-01-03
85.9
1
80
house
D000448
Robert
Lee
Doughton
NaN
1863-11-07
NC
D
Yes
1947-01-03
83.2
2
80
house
S000001
Adolph
Joachim
Sabath
NaN
1866-04-04
IL
D
Yes
1947-01-03
80.7
3
80
house
E000023
Charles
Aubrey
Eaton
NaN
1868-03-29
NJ
R
Yes
1947-01-03
78.8
4
80
house
L000296
William
NaN
Lewis
NaN
1868-09-22
KY
R
No
1947-01-03
78.3
In [ ]:
Content source: mercybenzaquen/foundations-homework
Similar notebooks: