You are currently looking at version 1.0 of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the Jupyter Notebook FAQ course resource.
In [1]:
import pandas as pd
pd.Series?
In [2]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)
Out[2]:
In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)
Out[3]:
In [36]:
animals = ['Tiger', 'Bear', None]
df = pd.Series(animals)
df['number_column'] = -99999
df
Out[36]:
In [5]:
numbers = [1, 2, None]
pd.Series(numbers)
Out[5]:
In [6]:
import numpy as np
np.nan == None
Out[6]:
In [7]:
np.nan == np.nan
Out[7]:
In [8]:
np.isnan(np.nan)
Out[8]:
In [9]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
Out[9]:
In [10]:
s.index
Out[10]:
In [11]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s
Out[11]:
In [12]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s
Out[12]:
In [13]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
Out[13]:
In [14]:
s.iloc[3]
Out[14]:
In [15]:
s.loc['Golf']
Out[15]:
In [16]:
s[3]
Out[16]:
In [17]:
s['Golf']
Out[17]:
In [38]:
sports = {99: 'Bhutan',
100: 'Scotland',
101: 'Japan',
102: 'South Korea'}
s = pd.Series(sports)
In [39]:
s.iloc[0] #This won't call s.iloc[0] as one might expect, it generates an error instead
Out[39]:
In [40]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s
Out[40]:
In [41]:
total = 0
for item in s:
total+=item
print(total)
In [23]:
import numpy as np
total = np.sum(s)
print(total)
In [43]:
#this creates a big series of random numbers
s = pd.Series(np.random.randint(0,1000,10000))
s.head()
Out[43]:
In [44]:
len(s)
Out[44]:
In [45]:
%%timeit -n 100
summary = 0
for item in s:
summary+=item
In [46]:
%%timeit -n 100
summary = np.sum(s)
In [47]:
s+=2 #adds two to each item in s using broadcasting
s.head()
Out[47]:
In [48]:
for label, value in s.iteritems():
s.set_value(label, value+2)
s.head()
Out[48]:
In [49]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
s.loc[label]= value+2
In [50]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2
In [57]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s
Out[57]:
In [58]:
original_sports = pd.Series({'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
'Barbados',
'Pakistan',
'England'],
index=['Cricket',
'Cricket',
'Cricket',
'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)
In [59]:
original_sports
Out[59]:
In [60]:
cricket_loving_countries
Out[60]:
In [61]:
all_countries
Out[61]:
In [62]:
all_countries.loc['Cricket']
Out[62]:
In [4]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
'Item Purchased': 'Dog Food',
'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
'Item Purchased': 'Kitty Litter',
'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
'Item Purchased': 'Bird Seed',
'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()
Out[4]:
In [65]:
df.loc['Store 2']
Out[65]:
In [66]:
type(df.loc['Store 2'])
Out[66]:
In [67]:
df.loc['Store 1']
Out[67]:
In [68]:
df.loc['Store 1', 'Cost']
Out[68]:
In [69]:
df.T
Out[69]:
In [70]:
df.T.loc['Cost']
Out[70]:
In [71]:
df['Cost']
Out[71]:
In [72]:
df.loc['Store 1']['Cost']
Out[72]:
In [73]:
df.loc[:,['Name', 'Cost']]
Out[73]:
In [82]:
df[['Name', 'Cost']]
Out[82]:
In [74]:
df['Name']
Out[74]:
In [75]:
df.drop('Store 1')
Out[75]:
In [76]:
df
Out[76]:
In [77]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df
Out[77]:
In [78]:
copy_df.drop?
In [79]:
del copy_df['Name']
copy_df
Out[79]:
In [83]:
df['Location'] = None
df
Out[83]:
In [3]:
costs = df['Cost']
costs
Out[3]:
In [4]:
costs+=2
costs
Out[4]:
In [5]:
df
Out[5]:
In [2]:
!cat olympics.csv
In [5]:
df = pd.read_csv('olympics.csv')
df.head()
Out[5]:
In [7]:
df = pd.read_csv('olympics.csv', index_col = 0, skiprows=1)
df.head()
Out[7]:
In [9]:
df.columns
Out[9]:
In [9]:
for col in df.columns:
if col[:2]=='01':
df.rename(columns={col:'Gold' + col[4:]}, inplace=True)
if col[:2]=='02':
df.rename(columns={col:'Silver' + col[4:]}, inplace=True)
if col[:2]=='03':
df.rename(columns={col:'Bronze' + col[4:]}, inplace=True)
if col[:1]=='№':
df.rename(columns={col:'#' + col[1:]}, inplace=True)
df.head()
Out[9]:
In [11]:
df['Gold'] > 0
Out[11]:
In [12]:
#boulean mask
only_gold = df.where(df['Gold'] > 0)
only_gold.head()
Out[12]:
In [14]:
only_gold['Gold'].sum()
Out[14]:
In [15]:
df['Gold'].count()
Out[15]:
In [16]:
only_gold = only_gold.dropna()
only_gold.head()
Out[16]:
In [17]:
only_gold = df[df['Gold'] > 0]
only_gold.head()
Out[17]:
In [18]:
len(df[(df['Gold'] > 0) | (df['Gold.1'] > 0)])
Out[18]:
In [19]:
df[(df['Gold.1'] > 0) & (df['Gold'] == 0)]
Out[19]:
In [10]:
df.head()
Out[10]:
In [11]:
# if changing index, he index column will be deleted so create a new column for the old index country
df['country'] = df.index
df = df.set_index('Gold')
df.head()
Out[11]:
In [ ]:
df = df.reset_index()
df.head()
In [ ]:
df = pd.read_csv('census.csv')
df.head()
In [ ]:
df['SUMLEV'].unique()
In [ ]:
df=df[df['SUMLEV'] == 50]
df.head()
In [ ]:
columns_to_keep = ['STNAME',
'CTYNAME',
'BIRTHS2010',
'BIRTHS2011',
'BIRTHS2012',
'BIRTHS2013',
'BIRTHS2014',
'BIRTHS2015',
'POPESTIMATE2010',
'POPESTIMATE2011',
'POPESTIMATE2012',
'POPESTIMATE2013',
'POPESTIMATE2014',
'POPESTIMATE2015']
df = df[columns_to_keep]
df.head()
In [ ]:
df = df.set_index(['STNAME', 'CTYNAME'])
df.head()
In [ ]:
df.loc['Michigan', 'Washtenaw County']
In [ ]:
df.loc[ [('Michigan', 'Washtenaw County'),
('Michigan', 'Wayne County')] ]
In [ ]:
df = pd.read_csv('log.csv')
df
In [ ]:
df.fillna?
In [ ]:
df = df.set_index('time')
df = df.sort_index()
df
In [ ]:
df = df.reset_index()
df = df.set_index(['time', 'user'])
df
In [ ]:
df = df.fillna(method='ffill')
df.head()