Install some packages so they are done by the time we need them. Type:
pip install pandasWhen that's done, type:
pip install matplotlibpip install picklepip install statsmodelsNote: if you installed the Continuum Python distribution, you may already have some of these packages installed.
conda install works similarly to pip, and also works on many R packages when used with Jupyter notebooks. ls to see what folders are in the current folder
In [20]:
my_string = 'Hello World'
print(my_string)
In [25]:
my_int = 2
my_float = 2.2
new_float = my_int+my_float
print(new_float)
type(new_float)
Out[25]:
In [ ]:
my_list = [0,1,2,3,4]
In [12]:
print(my_list[1])
In [13]:
my_list[2]='hello'
print(my_list)
In [14]:
my_dictionary = {'apple':4,
'pear':'yum'}
print(my_dictionary['apple'])
In [17]:
my_dictionary['numbers'] = my_list
print(my_dictionary['numbers'])
In [18]:
my_set = {'thing1','thing2','cat in hat','thing1', 4,4}
print(my_set)
In [19]:
my_tuple = (1,3,2)
print(my_tuple)
In [23]:
# Declare Data
my_data = 'hello '
my_other_data = 'world'
#Manipulate it
manipulated_data = my_data+my_other_data
#Output it:
print(manipulated_data)
In [26]:
# Declare Data
my_data = 1
my_other_data = 5
#Manipulate it
manipulated_data = 1/5
#Output it:
print(manipulated_data)
In [2]:
my_variable = 5
print(my_variable)
print(my_variable == 5)
In [5]:
print(my_variable > 6)
print(my_variable in [1,4,7])
In [6]:
True + True
Out[6]:
In [7]:
my_bool = 'ice cream'
if my_bool == 'ice cream':
print('yay')
elif my_bool == 'cake':
print('woo!')
else:
print('Woe and great tragedy!')
In [9]:
check = True
# check = False
# check = None
# check = 'monkey'
# check = 0
# check = 10
print('Check is:', check)
if check == 'monkey':
print('banana')
elif check:
print('yes')
else:
print('no')
if 1 not in [1,2,3]:
print('not not in')
if 1 in [1,2,3]:
print('in')
In [23]:
n = 0
while n < 5:
print(n)
n= n+1
In [22]:
print('use a range:')
for i in range(3):
print(i)
print('use a range slice:')
for i in range(3,6):
print(i)
print('iterate throubh a list:')
for i in my_list:
print(i)
In [10]:
my_list = [0,1,'cat',None,'frog',3]
animals = []
nums = []
for i in my_list:
if type(i)==str:
animals.append(i)
elif type(i)==int:
nums.append(i)
else:
pass
print(animals)
print(nums)
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# from ggplot import *
import pickle
import statsmodels.api as sm
In [3]:
baad_covars = pd.read_csv('BAAD_1_Lethality_Data.tab',sep='\t')
In [85]:
baad_covars.head()
Out[85]:
In [8]:
baad_covars.rename(columns = {'cowmastercountry':'country',
'masterccode':'ccode',
'mastertccode3606':'group_code',
'fatalities19982005':'fatalities'},
inplace = True)
baad_covars.replace({'country':{'United States of America':'US'}},
inplace = True)
print('Dimensions: ',baad_covars.shape)
baad_covars.head()
Out[8]:
In [9]:
#Set the index
baad_covars.set_index(['group_code'],inplace = True)
baad_covars.head()
Out[9]:
In [10]:
baad_covars.loc[:, 'fatalities'].head()
Out[10]:
In [15]:
baad_covars.loc[:,['OrgAge']].plot.density()
print(baad_covars.loc[:,['OrgAge']].mean())
baad_covars.loc[:,['fatalities']].plot.hist(bins=20)
Out[15]:
In [89]:
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['group','country']].head()
Out[89]:
In [90]:
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']] = None
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']].head()
Out[90]:
In [91]:
baad_covars.loc[baad_covars.terrStrong.isnull(),'terrStrong'].head()
Out[91]:
In [92]:
baad_covars['terrStrong'] = baad_covars.terrStrong.fillna(-77)
baad_covars.terrStrong.head()
Out[92]:
In [93]:
baad_covars['big'] = 0
baad_covars.loc[(baad_covars.fatalities>1) |
(baad_covars.degree>=1),
'big']=1
baad_covars.big.head()
Out[93]:
In [95]:
baad_covars.reset_index(inplace=True)
baad_covars.head()
Out[95]:
In [96]:
baad_covars.set_index(['group','country'],inplace = True)
baad_covars.head()
Out[96]:
In [103]:
indonesia_grps = baad_covars.xs('Indonesia',level = 'country',drop_level=False)
indonesia_grps = indonesia_grps.loc[indonesia_grps.fatalities>=1,['degree','ContainRelig',
'ContainEthno','terrStrong',
'ordsize','OrgAge']]
indonesia_grps.head()
Out[103]:
In [67]:
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 7
print('after')
copied_df
Out[67]:
In [68]:
import copy
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df.copy()
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 7
print('after')
copied_df
Out[68]:
In [107]:
indonesia_grps.to_csv('indonesia.csv')
pickle.dump(indonesia_grps, open('indonesia.p','wb'))
indonesia_grps = pickle.load(open('indonesia.p','rb'))
In [ ]:
In [ ]:
In [73]:
C = pd.DataFrame(['apple','orange','grape','pear','banana'],
columns = ['C'],
index = [2,4,3,0,1])
little_df['C'] = C
little_df
Out[73]:
In [85]:
C = pd.DataFrame(['apple','orange','grape','apple'],
columns = ['C'],
index = [2,4,3,'a'])
C['cuts']=['slices','wedges','whole','spirals']
print('C:')
print(C)
print('Inner: Intersection')
print(little_df.merge(right=C,
how='inner',
on=None,
left_index = True,
right_index =True))
print('Outer: Keep all rows')
print(little_df.merge(right=C,
how='outer',
on=None,
left_index = True,
right_index =True))
print('Left: Keep little_df')
print(little_df.merge(right=C,
how='left',
on=None,
left_index = True,
right_index =True))
print('Right: Keep C')
print(little_df.merge(right=C,
how='right',
on=None,
left_index = True,
right_index =True))
print('Outer, merging on column instead of index')
print(little_df.merge(right=C,
how='outer',
on='C',
left_index = True,
right_index =True))
In [94]:
add_df = pd.DataFrame({'A':[6],'B':[7],'C':'peach'},index= ['p'])
little_df = pd.concat([little_df,add_df])
little_df
Out[94]: