In [3]:
import pandas as pd
In [9]:
bond = pd.read_csv("data/jamesbond.csv")
bond.head(3)
Out[9]:
In [10]:
# Set index on load:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
In [42]:
bond = pd.read_csv("data/jamesbond.csv")
bond.set_index("Film", inplace=True)
bond.head(3)
Out[42]:
In [39]:
# Delete Index
bond.reset_index(drop=True)
Out[39]:
In [43]:
bond.reset_index(drop=False, inplace=True)
bond.head()
Out[43]:
In [44]:
bond.set_index("Year", inplace=True)
bond.head(3)
Out[44]:
In [47]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
# Sort by index is important with big dataset because Pandas,
# since is very intelligent, will retrieve rows in the fastest way!
Out[47]:
In [53]:
# Get by Index --> Return Series
bond.loc['A View to a Kill']
Out[53]:
In [58]:
try:
bond.loc['blablabla']
except KeyError:
print "Index not found!"
In [60]:
# slice! --> DataFrame
bond.loc['Diamonds Are Forever': 'Moonraker']
Out[60]:
In [62]:
# Get multiple rows by List of Indexes
bond.loc[['Moonraker', 'Licence to Kill']]
Out[62]:
In [63]:
# Get multiple rows by List of Indexes - even if not exists!
bond.loc[['Moonraker', 'Licence to Kill', 'bla bla bla']]
Out[63]:
In [64]:
'Moonraker' in bond.index
Out[64]:
In [70]:
bond.iloc[15]
Out[70]:
In [71]:
bond.iloc[[15,20]]
Out[71]:
In [72]:
bond.iloc[:4]
Out[72]:
In [73]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[73]:
In [79]:
bond.loc['GoldenEye']
bond.iloc[20]
Out[79]:
In [81]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[81]:
In [82]:
bond.ix['GoldenEye']
Out[82]:
In [84]:
# Projection
bond.loc['Moonraker', ['Actor', 'Budget']]
Out[84]:
In [89]:
bond.iloc[14, 2:5]
bond.iloc[14, [1,2,5]]
Out[89]:
In [90]:
bond.ix[20, 'Budget']
Out[90]:
In [95]:
bond.ix['Moonraker', 0:3]
Out[95]:
In [96]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[96]:
In [98]:
bond.ix['Dr. No']
Out[98]:
In [99]:
bond.ix['Dr. No', 'Actor'] = 'Comu'
In [100]:
bond.ix['Dr. No']
Out[100]:
In [101]:
bond.loc['Dr. No', 'Actor'] = 'Vale'
In [102]:
bond.ix['Dr. No']
Out[102]:
In [105]:
bond.loc['Dr. No', ['Actor', 'Director']] = ['Vale', 'Pippo']
In [106]:
bond.ix['Dr. No']
Out[106]:
In [107]:
bond.loc['Dr. No', ['Actor', 'Director']]
Out[107]:
In [129]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[129]:
In [118]:
# Change multiple values
# First version --> Very BAD!
In [111]:
mask = bond['Actor'] == 'Sean Connery'
In [117]:
# Create a copy and change values
df2 = bond[mask]
df2['Actor'] = 'Sir Sean Connery'
In [ ]:
# Change multiple values
# Second version --> Much better!
In [130]:
mask = bond['Actor'] == 'Sean Connery'
bond.loc[mask, ['Actor', 'Bond Actor Salary']] = ['pippo', 100000000]
bond.head(5)
Out[130]:
In [133]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[133]:
In [136]:
## Rename Columns
bond.rename(columns={"Year": "Release Date", "Budget": "Moneyyyy"}, inplace=True)
bond.head()
Out[136]:
In [137]:
## rename Indexes
bond.rename(index={"A View to a Kill": "Bla bla bla"}, inplace=True)
bond.head()
Out[137]:
In [141]:
# Rename all the columns in order
bond.columns = ['asd', '123', 'zxcaa', 'sdjkadkas', 'sdczaaa', 'Bla bla']
In [142]:
bond.head(1)
Out[142]:
In [167]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[167]:
In [168]:
# delete all occurrences
# Set axis = 0 to remove rows
bond.drop(labels='Casino Royale', inplace=True)
bond.head(3)
Out[168]:
In [170]:
# Set axis = 1 to remove columns
bond.drop('Year', axis=1, inplace=True)
bond.head(2)
Out[170]:
In [171]:
bond.drop(['Actor', 'Director'], axis=1, inplace=True)
bond.head(2)
Out[171]:
In [177]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[177]:
In [178]:
# pop an entire column
actor = bond.pop('Actor')
actor
Out[178]:
In [179]:
bond.head(1)
Out[179]:
In [180]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[180]:
In [181]:
del bond['Director']
bond.head()
Out[181]:
In [182]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[182]:
In [193]:
bond.sample(3)
Out[193]:
In [203]:
bond.sample(frac=.15)
Out[203]:
In [217]:
bond.sample(n=3, axis=1).head(2)
Out[217]:
In [218]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[218]:
In [219]:
bond.sort_values("Box Office", ascending=False).head(3)
Out[219]:
In [229]:
# max(bond['Box Office'])
# Get Dataframe
bond.nlargest(3, 'Box Office')
Out[229]:
In [231]:
# Get Series
bond['Box Office'].nlargest(2)
Out[231]:
In [227]:
# min(bond['Box Office'])
bond.nsmallest(3, 'Box Office')
Out[227]:
In [232]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[232]:
In [236]:
mask = bond['Actor']=='Sean Connery'
bond.where(mask)
Out[236]:
In [237]:
bond.where(bond['Box Office']>800)
Out[237]:
In [239]:
mask2 = bond['Box Office']>800
bond.where(mask & mask2)
bond.where(mask | mask2)
Out[239]:
In [240]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[240]:
In [241]:
# we need to avoid space in columns!
bond.columns = [x.replace(" ", "_") for x in bond.columns]
In [243]:
bond.head(1)
Out[243]:
In [245]:
bond.query('Actor == "Sean Connery"')
Out[245]:
In [251]:
bond.query('Budget > 100')
Out[251]:
In [252]:
bond.query('Actor != "Sean Connery" and Budget > 100')
Out[252]:
In [255]:
bond.query('Actor in ["Sean Connery", "Timothy Dalton"]')
Out[255]:
In [256]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[256]:
In [257]:
def my_func(number):
return str(number) + " MILLIONS!"
In [259]:
bond['Box Office'] = bond['Box Office'].apply(my_func)
bond['Budget'] = bond['Budget'].apply(my_func)
In [260]:
bond.head(2)
Out[260]:
In [261]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[261]:
In [267]:
## Automatic Way
columns = ['Box Office', 'Budget', 'Bond Actor Salary']
for col in columns:
bond[col] = bond[col].apply(my_func)
In [268]:
bond.head(2)
Out[268]:
In [269]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[269]:
In [270]:
def good_movie(row):
# my list will be something like:
# [1985, Roger Moore, John Glen, 275.2, 54.5, 9.1]
actor = row[1]
budget =row[4]
if actor == 'Pierce Brosnan':
return 'The best'
elif actor == 'Roger Moore' and budget > 40:
return 'Enjoyable'
return 'BHA!'
In [273]:
bond['is_good'] = bond.apply(good_movie, axis=1)
In [274]:
bond.head(5)
Out[274]:
In [275]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)
Out[275]:
In [280]:
copy_of_bond_by_value = bond.copy()
copy_of_bond_by_reference = bond
copy_of_bond_by_value is bond, copy_of_bond_by_reference is bond
Out[280]:
In [282]:
series_copy_by_value = bond['Box Office'].copy()
series_copy_by_ref = bond['Box Office']
series_copy_by_value is bond['Box Office'], series_copy_by_ref is bond['Box Office']
Out[282]:
In [ ]: