Vote view congressional data

Vote view nominate scores updates Weekly Update of "Common Space" DW-NOMINATE Scores


In [1]:
import pandas as pd
import numpy as np
from urllib2 import Request, urlopen

In [2]:
data_url = "http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT"

In [3]:
import csv
import requests

r = requests.get('http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT') 
data = [row for row in r.iter_lines()]

In [4]:
columns = [ "congress_number", "icpsr" , "state_code" , "congressional_district_number" , "state_name" , "party_code" , "name" , "1st_dimension_coordinate" , "2nd_dimension_coordinate" ,"log_likelihood" ,"number_votes" , "number_of_classification_errors", "geometric_mean_probability"]

In [5]:
def data_clean(data):
    new_data = []
    for line in data:
        line = line.split(" ")
        line = filter(None, line)
        if len((line[3]))>=2:
            line[2][:-1]
            line.insert( 3, line[2][2:])
            line[2] = line[2][:-1]
        if line[4] =="NEW" or line[4]=="WEST" or line[4] =="RHODE" or line[4]=="SOUTH" or line[4]=="NORTH":
            line[4] = line[4] + ' '+line[5]
            line.pop(5)
        if len(line[7])<3:
            line[6]= line[6] +' '+line[7]
            line.pop(7)
        new_data.append(line)
    return new_data

In [6]:
clean_data_list = data_clean(data)

In [7]:
nominate_scores = pd.DataFrame(clean_data_list)

In [8]:
nominate_scores = nominate_scores.ix[:,:12]

In [9]:
nominate_scores.columns = columns
nominates_scores = nominate_scores.ix[:, nominate_scores.columns != 'name'].convert_objects(convert_numeric=True)


/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app

In [10]:
nominate_scores.name
nominate_scores = pd.concat([nominates_scores,nominate_scores.name], axis=1)
nominate_scores.head()


Out[10]:
congress_number icpsr state_code congressional_district_number state_name party_code 1st_dimension_coordinate 2nd_dimension_coordinate log_likelihood number_votes number_of_classification_errors geometric_mean_probability name
0 1 9062 19 8 CONNECT 5000 0.541 0.444 -25.62408 80.0 13.0 0.726 STURGES
1 1 9706 19 8 CONNECT 5000 0.745 0.176 -16.39512 86.0 5.0 0.826 WADSWORTH
2 1 8457 19 8 CONNECT 5000 0.594 0.296 -40.40777 107.0 18.0 0.685 SHERMAN
3 1 4766 19 8 CONNECT 5000 0.639 0.282 -28.80767 84.0 12.0 0.710 HUNTINGTON
4 1 9489 19 8 CONNECT 5000 0.694 0.228 -30.44611 106.0 11.0 0.750 TRUMBULL

In [11]:
nominate_scores[40200:].head()


Out[11]:
congress_number icpsr state_code congressional_district_number state_name party_code 1st_dimension_coordinate 2nd_dimension_coordinate log_likelihood number_votes number_of_classification_errors geometric_mean_probability name
40200 102 15637 46 5 MISSISS 100 NaN -0.047 0.92900 -319.36086 771.0 139.000 TAYLOR
40201 102 12009 34 1 MISSOUR 100 -0.495 -0.869 -183.81580 700.00000 74.0 0.769 CLAY
40202 102 29128 34 2 MISSOUR 100 -0.289 -0.105 -149.26880 779.00000 56.0 0.826 HORN
40203 102 14421 34 3 MISSOUR 100 -0.417 0.250 -99.13136 723.00000 38.0 0.872 GEPHARDT
40204 102 14451 34 4 MISSOUR 100 -0.172 0.720 -225.71602 756.00000 120.0 0.742 SKELTON

In [19]:
states = nominate_scores.state_name.unique()
print states, len(states)


['CONNECT' 'DELAWAR' 'GEORGIA' 'MARYLAN' 'MASSACH' 'NEW HAM' 'NEW JER'
 'NEW YOR' 'NORTH C' 'PENNSYL' 'SOUTH C' 'VIRGINI' 'RHODE I' 'KENTUCK'
 'VERMONT' 'TENNESS' 'USA' 'OHIO' 'LOUISIA' 'INDIANA' 'ILLINOI' 'MISSISS'
 'ALABAMA' 'MAINE' 'MISSOUR' 'ARKANSA' 'MICHIGA' 'FLORIDA' 'IOWA' 'TEXAS'
 'WISCONS' 'CALIFOR' 'MINNESO' 'OREGON' 'KANSAS' 'NEVADA' 'WEST VI'
 'NEBRASK' 'COLORAD' 'IDAHO' 'MONTANA' 'NORTH D' 'SOUTH D' 'WASHING'
 'WYOMING' 'UTAH' 'OKLAHOM' 'ARIZONA' 'NEW MEX' 'ALASKA' 'HAWAII'] 51

In [15]:
nominate_scores.party_code.unique()


Out[15]:
array([5000, 4000,    1,  200, 1346, 8888, 6000, 7777, 8000, 7000,   22,
        555, 1275,   26,   44,   29,  100,  328,  112,  329,  603,  403,
        310, 1111,  300, 4444,   46, 3333,  108,  206,   37,  203,  331,
       1116,  208,  326,  117,  114,  355,  356,  340, 1060,  354,  213,
        380,  370,  347,  537,  522,  402])