Vote view congressional data

Vote view nominate scores updates Weekly Update of "Common Space" DW-NOMINATE Scores



In [1]:

    
import pandas as pd
import numpy as np
from urllib2 import Request, urlopen



In [2]:

    
data_url = "http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT"



In [3]:

    
import csv
import requests

r = requests.get('http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT') 
data = [row for row in r.iter_lines()]



In [4]:

    
columns = [ "congress_number", "icpsr" , "state_code" , "congressional_district_number" , "state_name" , "party_code" , "name" , "1st_dimension_coordinate" , "2nd_dimension_coordinate" ,"log_likelihood" ,"number_votes" , "number_of_classification_errors", "geometric_mean_probability"]



In [5]:

    
def data_clean(data):
    new_data = []
    for line in data:
        line = line.split(" ")
        line = filter(None, line)
        if len((line[3]))>=2:
            line[2][:-1]
            line.insert( 3, line[2][2:])
            line[2] = line[2][:-1]
        if line[4] =="NEW" or line[4]=="WEST" or line[4] =="RHODE" or line[4]=="SOUTH" or line[4]=="NORTH":
            line[4] = line[4] + ' '+line[5]
            line.pop(5)
        if len(line[7])<3:
            line[6]= line[6] +' '+line[7]
            line.pop(7)
        new_data.append(line)
    return new_data



In [6]:

    
clean_data_list = data_clean(data)



In [7]:

    
nominate_scores = pd.DataFrame(clean_data_list)



In [8]:

    
nominate_scores = nominate_scores.ix[:,:12]



In [9]:

    
nominate_scores.columns = columns
nominates_scores = nominate_scores.ix[:, nominate_scores.columns != 'name'].convert_objects(convert_numeric=True)









    



/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app



In [10]:

    
nominate_scores.name
nominate_scores = pd.concat([nominates_scores,nominate_scores.name], axis=1)
nominate_scores.head()









    Out[10]:






  
    
      
      congress_number
      icpsr
      state_code
      congressional_district_number
      state_name
      party_code
      1st_dimension_coordinate
      2nd_dimension_coordinate
      log_likelihood
      number_votes
      number_of_classification_errors
      geometric_mean_probability
      name
    
  
  
    
      0
      1
      9062
      19
      8
      CONNECT
      5000
      0.541
      0.444
      -25.62408
      80.0
      13.0
      0.726
      STURGES
    
    
      1
      1
      9706
      19
      8
      CONNECT
      5000
      0.745
      0.176
      -16.39512
      86.0
      5.0
      0.826
      WADSWORTH
    
    
      2
      1
      8457
      19
      8
      CONNECT
      5000
      0.594
      0.296
      -40.40777
      107.0
      18.0
      0.685
      SHERMAN
    
    
      3
      1
      4766
      19
      8
      CONNECT
      5000
      0.639
      0.282
      -28.80767
      84.0
      12.0
      0.710
      HUNTINGTON
    
    
      4
      1
      9489
      19
      8
      CONNECT
      5000
      0.694
      0.228
      -30.44611
      106.0
      11.0
      0.750
      TRUMBULL



In [11]:

    
nominate_scores[40200:].head()









    Out[11]:






  
    
      
      congress_number
      icpsr
      state_code
      congressional_district_number
      state_name
      party_code
      1st_dimension_coordinate
      2nd_dimension_coordinate
      log_likelihood
      number_votes
      number_of_classification_errors
      geometric_mean_probability
      name
    
  
  
    
      40200
      102
      15637
      46
      5
      MISSISS
      100
      NaN
      -0.047
      0.92900
      -319.36086
      771.0
      139.000
      TAYLOR
    
    
      40201
      102
      12009
      34
      1
      MISSOUR
      100
      -0.495
      -0.869
      -183.81580
      700.00000
      74.0
      0.769
      CLAY
    
    
      40202
      102
      29128
      34
      2
      MISSOUR
      100
      -0.289
      -0.105
      -149.26880
      779.00000
      56.0
      0.826
      HORN
    
    
      40203
      102
      14421
      34
      3
      MISSOUR
      100
      -0.417
      0.250
      -99.13136
      723.00000
      38.0
      0.872
      GEPHARDT
    
    
      40204
      102
      14451
      34
      4
      MISSOUR
      100
      -0.172
      0.720
      -225.71602
      756.00000
      120.0
      0.742
      SKELTON



In [19]:

    
states = nominate_scores.state_name.unique()
print states, len(states)









    



['CONNECT' 'DELAWAR' 'GEORGIA' 'MARYLAN' 'MASSACH' 'NEW HAM' 'NEW JER'
 'NEW YOR' 'NORTH C' 'PENNSYL' 'SOUTH C' 'VIRGINI' 'RHODE I' 'KENTUCK'
 'VERMONT' 'TENNESS' 'USA' 'OHIO' 'LOUISIA' 'INDIANA' 'ILLINOI' 'MISSISS'
 'ALABAMA' 'MAINE' 'MISSOUR' 'ARKANSA' 'MICHIGA' 'FLORIDA' 'IOWA' 'TEXAS'
 'WISCONS' 'CALIFOR' 'MINNESO' 'OREGON' 'KANSAS' 'NEVADA' 'WEST VI'
 'NEBRASK' 'COLORAD' 'IDAHO' 'MONTANA' 'NORTH D' 'SOUTH D' 'WASHING'
 'WYOMING' 'UTAH' 'OKLAHOM' 'ARIZONA' 'NEW MEX' 'ALASKA' 'HAWAII'] 51



In [15]:

    
nominate_scores.party_code.unique()









    Out[15]:





array([5000, 4000,    1,  200, 1346, 8888, 6000, 7777, 8000, 7000,   22,
        555, 1275,   26,   44,   29,  100,  328,  112,  329,  603,  403,
        310, 1111,  300, 4444,   46, 3333,  108,  206,   37,  203,  331,
       1116,  208,  326,  117,  114,  355,  356,  340, 1060,  354,  213,
        380,  370,  347,  537,  522,  402])

	congress_number	icpsr	state_code	congressional_district_number	state_name	party_code	1st_dimension_coordinate	2nd_dimension_coordinate	log_likelihood	number_votes	number_of_classification_errors	geometric_mean_probability	name
0	1	9062	19	8	CONNECT	5000	0.541	0.444	-25.62408	80.0	13.0	0.726	STURGES
1	1	9706	19	8	CONNECT	5000	0.745	0.176	-16.39512	86.0	5.0	0.826	WADSWORTH
2	1	8457	19	8	CONNECT	5000	0.594	0.296	-40.40777	107.0	18.0	0.685	SHERMAN
3	1	4766	19	8	CONNECT	5000	0.639	0.282	-28.80767	84.0	12.0	0.710	HUNTINGTON
4	1	9489	19	8	CONNECT	5000	0.694	0.228	-30.44611	106.0	11.0	0.750	TRUMBULL

	congress_number	icpsr	state_code	congressional_district_number	state_name	party_code	1st_dimension_coordinate	2nd_dimension_coordinate	log_likelihood	number_votes	number_of_classification_errors	geometric_mean_probability	name
40200	102	15637	46	5	MISSISS	100	NaN	-0.047	0.92900	-319.36086	771.0	139.000	TAYLOR
40201	102	12009	34	1	MISSOUR	100	-0.495	-0.869	-183.81580	700.00000	74.0	0.769	CLAY
40202	102	29128	34	2	MISSOUR	100	-0.289	-0.105	-149.26880	779.00000	56.0	0.826	HORN
40203	102	14421	34	3	MISSOUR	100	-0.417	0.250	-99.13136	723.00000	38.0	0.872	GEPHARDT
40204	102	14451	34	4	MISSOUR	100	-0.172	0.720	-225.71602	756.00000	120.0	0.742	SKELTON