In [7]:
import sys
sys.path.insert(1,'/Library/Python/2.7/site-packages')
import argparse
import httplib2
import json
import oauth2client,oauth2client.file,oauth2client.tools
import gspread
import pandas as pd
In [8]:
CLIENT_SECRET_FILE = '/Users/fatchat/cloud-credentials/gapi.json'
APPLICATION_NAME = 'API Project'
CREDENTIAL_PATH='/Users/fatchat/cloud-credentials/gapi-cred.json'
In [9]:
def get_credentials(scopes,force=False):
store = oauth2client.file.Storage(CREDENTIAL_PATH)
credentials = store.get()
if credentials is None or credentials.access_token_expired or force:
flow = oauth2client.client.flow_from_clientsecrets(CLIENT_SECRET_FILE, scopes)
flow.user_agent = APPLICATION_NAME
flags=argparse.ArgumentParser(parents=[oauth2client.tools.argparser]).parse_args(['--noauth_local_webserver'])
credentials = oauth2client.tools.run_flow(flow, store, flags)
return credentials
In [11]:
creds=get_credentials(u'https://spreadsheets.google.com/feeds')
In [28]:
gc=gspread.authorize(creds)
ssheet=gc.open_by_key('1XV4mZi3gYDgwx5PrLwqqHTUlHkwkV-6uy_yeJh3X46o')
wsheet=ssheet.worksheet('US mass shootings')
In [29]:
wsheet.row_count,wsheet.col_count
Out[29]:
In [67]:
state_abbreviations="""
State Abbreviation Postal Code
Alabama Ala. AL
Alaska Alaska AK
American Samoa AS
Arizona Ariz. AZ
Arkansas Ark. AR
California Calif. CA
Colorado Colo. CO
Connecticut Conn. CT
Delaware Del. DE
Dist. of Columbia D.C. DC
Florida Fla. FL
Georgia Ga. GA
Guam Guam GU
Hawaii Hawaii HI
Idaho Idaho ID
Illinois Ill. IL
Indiana Ind. IN
Iowa Iowa IA
Kansas Kans. KS
Kentucky Ky. KY
Louisiana La. LA
Maine Maine ME
Maryland Md. MD
Marshall Islands MH
Massachusetts Mass. MA
Michigan Mich. MI
Micronesia FM
Minnesota Minn. MN
Mississippi Miss. MS
Missouri Mo. MO
Montana Mont. MT
Nebraska Nebr. NE
Nevada Nev. NV
New Hampshire N.H. NH
New Jersey N.J. NJ
New Mexico N.M. NM
New York N.Y. NY
North Carolina N.C. NC
North Dakota N.D. ND
Northern Marianas MP
Ohio Ohio OH
Oklahoma Okla. OK
Oregon Ore. OR
Palau PW
Pennsylvania Pa. PA
Puerto Rico P.R. PR
Rhode Island R.I. RI
South Carolina S.C. SC
South Dakota S.D. SD
Tennessee Tenn. TN
Texas Tex. TX
Utah Utah UT
Vermont Vt. VT
Virginia Va. VAM
Virgin Islands V.I. VI
Washington Wash. WA
West Virginia W.Va. WV
Wisconsin Wis. WI
Wyoming Wyo. WY
"""
state_lookup_map={}
for s in state_abbreviations.split('\n'):
if s.strip()=='':
continue
long_name,short_name,state_code=map(lambda x: x.strip().upper(),s.strip().split('\t'))
state_lookup_map[long_name]=state_code
state_lookup_map[short_name]=state_code
state_lookup_map[state_code]=state_code
In [31]:
raw_df=pd.DataFrame([wsheet.row_values(row_num) for row_num in range(1,wsheet.row_count+1)])
df=raw_df
df.columns=df.ix[0]
df=df.ix[1:]
df=df.drop_duplicates()[:-1]
In [70]:
def extract_state(geo):
if type(geo)==str and geo.find(',') > -1:
city,state=geo.split(',')
return state_lookup_map[state.strip().upper()]
return geo
In [72]:
df['state']=df.Location.apply(extract_state)
In [75]:
df['Type of weapons']
Out[75]:
In [ ]: