In [1]:
import pandas as pd
import numpy as np
from urllib2 import Request, urlopen
In [2]:
data_url = "http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT"
In [3]:
import csv
import requests
r = requests.get('http://voteview.uga.edu/ftp/junkord/HANDSL01114A20_STAND_ALONE_23.DAT')
data = [row for row in r.iter_lines()]
In [4]:
columns = [ "congress_number", "icpsr" , "state_code" , "congressional_district_number" , "state_name" , "party_code" , "name" , "1st_dimension_coordinate" , "2nd_dimension_coordinate" ,"log_likelihood" ,"number_votes" , "number_of_classification_errors", "geometric_mean_probability"]
In [5]:
def data_clean(data):
new_data = []
for line in data:
line = line.split(" ")
line = filter(None, line)
if len((line[3]))>=2:
line[2][:-1]
line.insert( 3, line[2][2:])
line[2] = line[2][:-1]
if line[4] =="NEW" or line[4]=="WEST" or line[4] =="RHODE" or line[4]=="SOUTH" or line[4]=="NORTH":
line[4] = line[4] + ' '+line[5]
line.pop(5)
if len(line[7])<3:
line[6]= line[6] +' '+line[7]
line.pop(7)
new_data.append(line)
return new_data
In [6]:
clean_data_list = data_clean(data)
In [7]:
nominate_scores = pd.DataFrame(clean_data_list)
In [8]:
nominate_scores = nominate_scores.ix[:,:12]
In [9]:
nominate_scores.columns = columns
nominates_scores = nominate_scores.ix[:, nominate_scores.columns != 'name'].convert_objects(convert_numeric=True)
In [10]:
nominate_scores.name
nominate_scores = pd.concat([nominates_scores,nominate_scores.name], axis=1)
nominate_scores.head()
Out[10]:
In [11]:
nominate_scores[40200:].head()
Out[11]:
In [19]:
states = nominate_scores.state_name.unique()
print states, len(states)
In [15]:
nominate_scores.party_code.unique()
Out[15]: