In [4]:
import pickle
mydata = [1,2,3,4,5,6,7,8,9,10]
pickle.dump(mydata, open('mydata.p','wb'))
In [6]:
more_data = [10,9,8,7,6,5,4,3,2,1]
pickle.dump([mydata,more_data], open('so_much_data.p','wb'))
In [7]:
mydata = pickle.load(open("mydata.p",'rb'))
print(mydata)
In [12]:
[mydata, more_data] = pickle.load(open('so_much_data.p','rb'))
print(mydata)
print(more_data)
In [119]:
my_list = [1,3,2,4,7,'Sandwich']
print(len(my_list))
print(my_list[0:2])
print(my_list[-1])
print(my_list[0:4:2])
In [14]:
import numpy as np
my_array = np.random.poisson(lam=3,size=10)
print(my_array)
print(my_array.shape)
In [16]:
import pandas as pd
my_series = pd.Series(my_list)
my_series.shape
Out[16]:
In [17]:
my_series = pd.Series(my_array,
index = [1,2,3,'cat','dog','10','n',8,7,6])
print(my_series)
In [18]:
print(my_series.mean())
my_series = pd.Series(['hello world','hello planet'])
print(my_series.str.replace('hello','goodbye'))
In [20]:
new_list = list(my_array)
print(new_list)
In [24]:
my_2d_list = [[1,4],[2,1],[8,10],[4,7],[9,2],[4,5]]
my_3var_list = [(1,4,7),(2,1,0),(8,10,2),(4,7,4),(9,2,7),(4,5,3)]
In [25]:
for i,new_var in enumerate(my_list):
my_2d_list[i].append(new_var)
print(my_2d_list)
In [90]:
my_dict = {
'var1':[1,2,8,4,9,4],
'var2': [4,1,10,7,2,5]
}
my_dict['var3']=my_list
print(my_dict['var3'])
In [120]:
my_matrix = np.array(my_2d_list)
my_other_matrix = np.array(my_3var_list)
print(my_matrix)
print(my_matrix[0,0:2])
In [37]:
big_matrix = np.concatenate([my_matrix, my_other_matrix],axis=0)
print(big_matrix)
In [41]:
big_matrix = np.concatenate([my_matrix, my_other_matrix],axis=1)
print(big_matrix)
In [43]:
print(my_matrix.T + my_other_matrix.T*5)
In [126]:
my_rand_matrix = np.random.randn(5,3)
print(my_rand_matrix)
In [127]:
my_rand_matrix[:,0]=my_rand_matrix[:,0]*.5+5
my_rand_matrix[:,1]=my_rand_matrix[:,1]*.5-5
my_rand_matrix[:,2]=my_rand_matrix[:,2]*10+50
print(my_rand_matrix.T)
In [128]:
BIG_array = np.zeros((100,100))
rows = (1,6,29,40,43,50)
columns = (3,6,90,58,34,88)
BIG_array[(rows,columns)]=[4,6,14,1,3,22]
In [129]:
import scipy as sp
from scipy import sparse
BIG_array = sparse.csc_matrix(BIG_array)
print(BIG_array)
In [130]:
df = pd.DataFrame(my_dict)
df
Out[130]:
In [131]:
df = pd.DataFrame(my_2d_list,
columns = ['var1','var2','var3'])
df
Out[131]:
In [132]:
df = pd.DataFrame(my_rand_matrix,
columns = ['dist_1','dist_2','dist_3'],
index = ['obs1','obs2','obs3','obs4','fred'])
df
Out[132]:
In [133]:
df.describe()
Out[133]:
In [134]:
import matplotlib.pyplot as plt
%matplotlib inline
df.plot.density()
Out[134]:
In [135]:
df.dist_1.plot.hist(bins=3)
Out[135]:
In [95]:
baad_covars = pd.read_csv('BAAD_1_Lethality_Data.tab',sep='\t')
In [96]:
baad_covars.head(3)
Out[96]:
In [76]:
print(baad_covars.shape)
baad_covars.columns
Out[76]:
In [77]:
baad_covars.rename(columns = {'cowmastercountry':'country',
'masterccode':'ccode',
'mastertccode3606':'group_code',
'fatalities19982005':'fatalities'},
inplace = True)
baad_covars.replace({'country':{'United States of America':'US'}},
inplace = True)
print('Dimensions: ',baad_covars.shape)
baad_covars.head()
Out[77]:
In [39]:
#Set the index
baad_covars.set_index(['group_code'],inplace = True)
baad_covars.head()
Out[39]:
In [78]:
baad_covars.to_csv('updated_baad.csv')
In [40]:
baad_covars.loc[:, ['fatalities']].head()
Out[40]:
In [41]:
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['group','country']].head()
Out[41]:
In [102]:
baad_covars.loc[(baad_covars.ContainRelig==1)&
(baad_covars.terrStrong==1),['group']]
Out[102]:
In [103]:
baad_covars.OrgAge.plot.hist(bins=10)
Out[103]:
In [83]:
state_level = baad_covars.loc[:,['country','OrgAge',
'ordsize','degree',
'fatalities']
].groupby(['country']).sum()
state_level.head()
Out[83]:
In [62]:
baad_covars['big'] = 0
baad_covars.loc[(baad_covars.fatalities>1) |
(baad_covars.degree>=1),
'big']=1
baad_covars.big.head()
Out[62]:
In [54]:
print(type(np.nan))
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']] = None
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']].head()
Out[54]:
In [56]:
baad_covars.loc[baad_covars.terrStrong.isnull(),'terrStrong'].head()
Out[56]:
In [58]:
baad_covars['terrStrong'] = baad_covars.terrStrong.fillna(-77)
baad_covars.terrStrong.head()
Out[58]:
In [60]:
baad_covars_dropped = baad_covars.dropna(axis='index',
subset=['terrStrong'],
inplace=False)
In [63]:
baad_covars.reset_index(inplace=True,
drop = False)
baad_covars.head()
Out[63]:
In [64]:
baad_covars.set_index(['group','country'],inplace = True)
baad_covars.head()
Out[64]:
In [65]:
indonesia_grps = baad_covars.xs('Indonesia',level = 'country',drop_level=False)
indonesia_grps = indonesia_grps.loc[indonesia_grps.fatalities>=1,['degree','ContainRelig',
'ContainEthno','terrStrong',
'ordsize','OrgAge']]
indonesia_grps.head()
Out[65]:
In [137]:
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 'Sandwich'
print('after')
print(copied_df)
In [139]:
import copy
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df.copy()
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 'Sandwich'
print('after')
print(copied_df)
In [140]:
C = pd.DataFrame(['apple','orange','grape','pear','banana'],
columns = ['C'],
index = [2,4,3,0,1])
little_df['C'] = C
little_df
Out[140]:
In [141]:
C = pd.DataFrame(['apple','orange','grape','apple'],
columns = ['C'],
index = [2,4,3,'a'])
C['cuts']=['slices','wedges','whole','spirals']
print('C:')
print(C)
print('Inner: Intersection')
print(little_df.merge(right=C,
how='inner',
on=None,
left_index = True,
right_index =True))
print('Outer: Keep all rows')
print(little_df.merge(right=C,
how='outer',
on=None,
left_index = True,
right_index =True))
print('Left: Keep little_df')
print(little_df.merge(right=C,
how='left',
on=None,
left_index = True,
right_index =True))
print('Right: Keep C')
print(little_df.merge(right=C,
how='right',
on=None,
left_index = True,
right_index =True))
print('Outer, merging on column instead of index')
print(little_df.merge(right=C,
how='outer',
on='C',
left_index = False,
right_index =False))
In [142]:
add_df = pd.DataFrame({'A':[6],'B':[7],'C':'peach'},index= ['p'])
little_df = pd.concat([little_df,add_df])
little_df
Out[142]:
In [107]:
asthma_data = pd.read_csv('asthma-emergency-department-visit-rates-by-zip-code.csv')
asthma_data.head(2)
Out[107]:
In [108]:
asthma_data[['zip','coordinates']] = asthma_data.loc[:,'ZIP code'].str.split(
pat='\n',expand=True)
asthma_data.drop('ZIP code', axis=1,inplace=True)
asthma_data.head(2)
Out[108]:
In [109]:
asthma_grouped = asthma_data.groupby(by=['Year','zip']).sum()
asthma_grouped.head(4)
Out[109]:
In [110]:
asthma_grouped.drop('County Fips code',axis=1,inplace=True)
temp_grp = asthma_data.groupby(by=['Year','zip']).first()
asthma_grouped[['fips',
'county',
'coordinates']]=temp_grp.loc[:,['County Fips code',
'County',
'coordinates']].copy()
asthma_grouped.loc[:,'Number of Visits']=\
asthma_grouped.loc[:,'Number of Visits']/2
asthma_grouped.head(2)
Out[110]:
In [111]:
asthma_unstacked = asthma_data.pivot_table(index = ['Year',
'zip',
'County',
'coordinates',
'County Fips code'],
columns = 'Age Group',
values = 'Number of Visits')
asthma_unstacked.reset_index(drop=False,inplace=True)
asthma_unstacked.head(2)
Out[111]:
In [113]:
asthma_unstacked.rename(columns={
'zip':'Zip',
'coordinates':'Coordinates',
'County Fips code':'Fips',
'Adults (18+)':'Adults',
'All Ages':'Incidents',
'Children (0-17)': 'Children'
},
inplace=True)
asthma_2015 = asthma_unstacked.loc[asthma_unstacked.Year==2015,:]
asthma_2015.head(2)
Out[113]:
In [114]:
asthma_2015.to_csv('asthma_2015.csv')
In [ ]: