Dataframe part 3

Let's play with some random stuff!


In [3]:
import pandas as pd

In [9]:
bond = pd.read_csv("data/jamesbond.csv")
bond.head(3)


Out[9]:
Film Year Actor Director Box Office Budget Bond Actor Salary
0 Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
1 From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
2 Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2

In [10]:
# Set index on load:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")

Indexes

set_index() & reset_index()


In [42]:
bond = pd.read_csv("data/jamesbond.csv")
bond.set_index("Film", inplace=True)
bond.head(3)


Out[42]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2

In [39]:
# Delete Index
bond.reset_index(drop=True)


Out[39]:
Year Actor Director Box Office Budget Bond Actor Salary
0 1962 Sean Connery Terence Young 448.8 7.0 0.6
1 1963 Sean Connery Terence Young 543.8 12.6 1.6
2 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2
3 1965 Sean Connery Terence Young 848.1 41.9 4.7
4 1967 David Niven Ken Hughes 315.0 85.0 NaN
5 1967 Sean Connery Lewis Gilbert 514.2 59.9 4.4
6 1969 George Lazenby Peter R. Hunt 291.5 37.3 0.6
7 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
8 1973 Roger Moore Guy Hamilton 460.3 30.8 NaN
9 1974 Roger Moore Guy Hamilton 334.0 27.7 NaN
10 1977 Roger Moore Lewis Gilbert 533.0 45.1 NaN
11 1979 Roger Moore Lewis Gilbert 535.0 91.5 NaN
12 1981 Roger Moore John Glen 449.4 60.2 NaN
13 1983 Sean Connery Irvin Kershner 380.0 86.0 NaN
14 1983 Roger Moore John Glen 373.8 53.9 7.8
15 1985 Roger Moore John Glen 275.2 54.5 9.1
16 1987 Timothy Dalton John Glen 313.5 68.8 5.2
17 1989 Timothy Dalton John Glen 250.9 56.7 7.9
18 1995 Pierce Brosnan Martin Campbell 518.5 76.9 5.1
19 1997 Pierce Brosnan Roger Spottiswoode 463.2 133.9 10.0
20 1999 Pierce Brosnan Michael Apted 439.5 158.3 13.5
21 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9
22 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
23 2008 Daniel Craig Marc Forster 514.2 181.4 8.1
24 2012 Daniel Craig Sam Mendes 943.5 170.2 14.5
25 2015 Daniel Craig Sam Mendes 726.7 206.3 NaN

In [43]:
bond.reset_index(drop=False, inplace=True)
bond.head()


Out[43]:
Film Year Actor Director Box Office Budget Bond Actor Salary
0 Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
1 From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
2 Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2
3 Thunderball 1965 Sean Connery Terence Young 848.1 41.9 4.7
4 Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [44]:
bond.set_index("Year", inplace=True)
bond.head(3)


Out[44]:
Film Actor Director Box Office Budget Bond Actor Salary
Year
1962 Dr. No Sean Connery Terence Young 448.8 7.0 0.6
1963 From Russia with Love Sean Connery Terence Young 543.8 12.6 1.6
1964 Goldfinger Sean Connery Guy Hamilton 820.4 18.6 3.2

Get rows from Index


In [47]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)

# Sort by index is important with big dataset because Pandas, 
# since is very intelligent, will retrieve rows in the fastest way!


Out[47]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

loc()


In [53]:
# Get by Index --> Return Series
bond.loc['A View to a Kill']


Out[53]:
Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [58]:
try:
    bond.loc['blablabla']
except KeyError:
    print "Index not found!"


Index not found!

In [60]:
# slice! --> DataFrame
bond.loc['Diamonds Are Forever': 'Moonraker']


Out[60]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9
Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
For Your Eyes Only 1981 Roger Moore John Glen 449.4 60.2 NaN
From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
GoldenEye 1995 Pierce Brosnan Martin Campbell 518.5 76.9 5.1
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2
Licence to Kill 1989 Timothy Dalton John Glen 250.9 56.7 7.9
Live and Let Die 1973 Roger Moore Guy Hamilton 460.3 30.8 NaN
Moonraker 1979 Roger Moore Lewis Gilbert 535.0 91.5 NaN

In [62]:
# Get multiple rows by List of Indexes
bond.loc[['Moonraker', 'Licence to Kill']]


Out[62]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Moonraker 1979 Roger Moore Lewis Gilbert 535.0 91.5 NaN
Licence to Kill 1989 Timothy Dalton John Glen 250.9 56.7 7.9

In [63]:
# Get multiple rows by List of Indexes - even if not exists!
bond.loc[['Moonraker', 'Licence to Kill', 'bla bla bla']]


Out[63]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Moonraker 1979.0 Roger Moore Lewis Gilbert 535.0 91.5 NaN
Licence to Kill 1989.0 Timothy Dalton John Glen 250.9 56.7 7.9
bla bla bla NaN NaN NaN NaN NaN NaN

In [64]:
'Moonraker' in bond.index


Out[64]:
True

iloc()

Index Location, indipendent from Index Value


In [70]:
bond.iloc[15]


Out[70]:
Year                           1969
Actor                George Lazenby
Director              Peter R. Hunt
Box Office                    291.5
Budget                         37.3
Bond Actor Salary               0.6
Name: On Her Majesty's Secret Service, dtype: object

In [71]:
bond.iloc[[15,20]]


Out[71]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
On Her Majesty's Secret Service 1969 George Lazenby Peter R. Hunt 291.5 37.3 0.6
The Man with the Golden Gun 1974 Roger Moore Guy Hamilton 334.0 27.7 NaN

In [72]:
bond.iloc[:4]


Out[72]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8

In [73]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[73]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [79]:
bond.loc['GoldenEye']
bond.iloc[20]


Out[79]:
Year                         1974
Actor                 Roger Moore
Director             Guy Hamilton
Box Office                    334
Budget                       27.7
Bond Actor Salary             NaN
Name: The Man with the Golden Gun, dtype: object

ix[ ]

Deprecated method..


In [81]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[81]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [82]:
bond.ix['GoldenEye']


/Users/alexcomu/Desktop/PLAYGROUND/python/pandas-for-dummies/env/lib/python2.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.
Out[82]:
Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [84]:
# Projection
bond.loc['Moonraker', ['Actor', 'Budget']]


Out[84]:
Actor     Roger Moore
Budget           91.5
Name: Moonraker, dtype: object

In [89]:
bond.iloc[14, 2:5]
bond.iloc[14, [1,2,5]]


Out[89]:
Actor                Roger Moore
Director               John Glen
Bond Actor Salary            7.8
Name: Octopussy, dtype: object

In [90]:
bond.ix[20, 'Budget']


Out[90]:
27.699999999999999

In [95]:
bond.ix['Moonraker', 0:3]


Out[95]:
Year                 1979
Actor         Roger Moore
Director    Lewis Gilbert
Name: Moonraker, dtype: object

Set new Value


In [96]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[96]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [98]:
bond.ix['Dr. No']


Out[98]:
Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [99]:
bond.ix['Dr. No', 'Actor'] = 'Comu'

In [100]:
bond.ix['Dr. No']


Out[100]:
Year                          1962
Actor                         Comu
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [101]:
bond.loc['Dr. No', 'Actor'] = 'Vale'

In [102]:
bond.ix['Dr. No']


Out[102]:
Year                          1962
Actor                         Vale
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [105]:
bond.loc['Dr. No', ['Actor', 'Director']] = ['Vale', 'Pippo']

In [106]:
bond.ix['Dr. No']


Out[106]:
Year                  1962
Actor                 Vale
Director             Pippo
Box Office           448.8
Budget                   7
Bond Actor Salary      0.6
Name: Dr. No, dtype: object

In [107]:
bond.loc['Dr. No', ['Actor', 'Director']]


Out[107]:
Actor        Vale
Director    Pippo
Name: Dr. No, dtype: object

Set multiple new Values


In [129]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[129]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [118]:
# Change multiple values
# First version --> Very BAD!

In [111]:
mask = bond['Actor'] == 'Sean Connery'

In [117]:
# Create a copy and change values
df2 = bond[mask]
df2['Actor'] = 'Sir Sean Connery'


/Users/alexcomu/Desktop/PLAYGROUND/python/pandas-for-dummies/env/lib/python2.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until

In [ ]:
# Change multiple values
# Second version --> Much better!

In [130]:
mask = bond['Actor'] == 'Sean Connery'
bond.loc[mask, ['Actor', 'Bond Actor Salary']] = ['pippo', 100000000]
bond.head(5)


Out[130]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN
Diamonds Are Forever 1971 pippo Guy Hamilton 442.5 34.7 100000000.0
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9

Rename Index Labels / Columns


In [133]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[133]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [136]:
## Rename Columns
bond.rename(columns={"Year": "Release Date", "Budget": "Moneyyyy"}, inplace=True)
bond.head()


Out[136]:
Release Date Actor Director Box Office Moneyyyy Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9

In [137]:
## rename Indexes
bond.rename(index={"A View to a Kill": "Bla bla bla"}, inplace=True)
bond.head()


Out[137]:
Release Date Actor Director Box Office Moneyyyy Bond Actor Salary
Film
Bla bla bla 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9

In [141]:
# Rename all the columns in order
bond.columns = ['asd', '123', 'zxcaa', 'sdjkadkas', 'sdczaaa', 'Bla bla']

In [142]:
bond.head(1)


Out[142]:
asd 123 zxcaa sdjkadkas sdczaaa Bla bla
Film
Bla bla bla 1985 Roger Moore John Glen 275.2 54.5 9.1

Delete Rows / Columns


In [167]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[167]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [168]:
# delete all occurrences
# Set axis = 0 to remove rows
bond.drop(labels='Casino Royale', inplace=True)
bond.head(3)


Out[168]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9

In [170]:
# Set axis = 1 to remove columns
bond.drop('Year', axis=1, inplace=True)
bond.head(2)


Out[170]:
Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill Roger Moore John Glen 275.2 54.5 9.1
Diamonds Are Forever Sean Connery Guy Hamilton 442.5 34.7 5.8

In [171]:
bond.drop(['Actor', 'Director'], axis=1, inplace=True)
bond.head(2)


Out[171]:
Box Office Budget Bond Actor Salary
Film
A View to a Kill 275.2 54.5 9.1
Diamonds Are Forever 442.5 34.7 5.8

In [177]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[177]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [178]:
# pop an entire column
actor = bond.pop('Actor')
actor


Out[178]:
Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Dalton
The Man with the Golden Gun           Roger Moore
The Spy Who Loved Me                  Roger Moore
The World Is Not Enough            Pierce Brosnan
Thunderball                          Sean Connery
Tomorrow Never Dies                Pierce Brosnan
You Only Live Twice                  Sean Connery
Name: Actor, dtype: object

In [179]:
bond.head(1)


Out[179]:
Year Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 John Glen 275.2 54.5 9.1

In [180]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[180]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [181]:
del bond['Director']
bond.head()


Out[181]:
Year Actor Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig 581.5 145.3 3.3
Casino Royale 1967 David Niven 315.0 85.0 NaN
Diamonds Are Forever 1971 Sean Connery 442.5 34.7 5.8
Die Another Day 2002 Pierce Brosnan 465.4 154.2 17.9

Create Random Sample


In [182]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[182]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [193]:
bond.sample(3)


Out[193]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Octopussy 1983 Roger Moore John Glen 373.8 53.9 7.8
Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1

In [203]:
bond.sample(frac=.15)


Out[203]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Never Say Never Again 1983 Sean Connery Irvin Kershner 380.0 86.0 NaN
Licence to Kill 1989 Timothy Dalton John Glen 250.9 56.7 7.9
You Only Live Twice 1967 Sean Connery Lewis Gilbert 514.2 59.9 4.4
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1

In [217]:
bond.sample(n=3, axis=1).head(2)


Out[217]:
Actor Year Budget
Film
A View to a Kill Roger Moore 1985 54.5
Casino Royale Daniel Craig 2006 145.3

nsmallest() / nlargest()


In [218]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[218]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [219]:
bond.sort_values("Box Office", ascending=False).head(3)


Out[219]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Skyfall 2012 Daniel Craig Sam Mendes 943.5 170.2 14.5
Thunderball 1965 Sean Connery Terence Young 848.1 41.9 4.7
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2

In [229]:
# max(bond['Box Office'])
# Get Dataframe
bond.nlargest(3, 'Box Office')


Out[229]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Skyfall 2012 Daniel Craig Sam Mendes 943.5 170.2 14.5
Thunderball 1965 Sean Connery Terence Young 848.1 41.9 4.7
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2

In [231]:
# Get Series
bond['Box Office'].nlargest(2)


Out[231]:
Film
Skyfall        943.5
Thunderball    848.1
Name: Box Office, dtype: float64

In [227]:
# min(bond['Box Office'])
bond.nsmallest(3, 'Box Office')


Out[227]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
Licence to Kill 1989 Timothy Dalton John Glen 250.9 56.7 7.9
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
On Her Majesty's Secret Service 1969 George Lazenby Peter R. Hunt 291.5 37.3 0.6

Filtering -> Where()


In [232]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[232]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [236]:
mask = bond['Actor']=='Sean Connery'
bond.where(mask)


Out[236]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Diamonds Are Forever 1971.0 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day NaN NaN NaN NaN NaN NaN
Dr. No 1962.0 Sean Connery Terence Young 448.8 7.0 0.6
For Your Eyes Only NaN NaN NaN NaN NaN NaN
From Russia with Love 1963.0 Sean Connery Terence Young 543.8 12.6 1.6
GoldenEye NaN NaN NaN NaN NaN NaN
Goldfinger 1964.0 Sean Connery Guy Hamilton 820.4 18.6 3.2
Licence to Kill NaN NaN NaN NaN NaN NaN
Live and Let Die NaN NaN NaN NaN NaN NaN
Moonraker NaN NaN NaN NaN NaN NaN
Never Say Never Again 1983.0 Sean Connery Irvin Kershner 380.0 86.0 NaN
Octopussy NaN NaN NaN NaN NaN NaN
On Her Majesty's Secret Service NaN NaN NaN NaN NaN NaN
Quantum of Solace NaN NaN NaN NaN NaN NaN
Skyfall NaN NaN NaN NaN NaN NaN
Spectre NaN NaN NaN NaN NaN NaN
The Living Daylights NaN NaN NaN NaN NaN NaN
The Man with the Golden Gun NaN NaN NaN NaN NaN NaN
The Spy Who Loved Me NaN NaN NaN NaN NaN NaN
The World Is Not Enough NaN NaN NaN NaN NaN NaN
Thunderball 1965.0 Sean Connery Terence Young 848.1 41.9 4.7
Tomorrow Never Dies NaN NaN NaN NaN NaN NaN
You Only Live Twice 1967.0 Sean Connery Lewis Gilbert 514.2 59.9 4.4

In [237]:
bond.where(bond['Box Office']>800)


Out[237]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Diamonds Are Forever NaN NaN NaN NaN NaN NaN
Die Another Day NaN NaN NaN NaN NaN NaN
Dr. No NaN NaN NaN NaN NaN NaN
For Your Eyes Only NaN NaN NaN NaN NaN NaN
From Russia with Love NaN NaN NaN NaN NaN NaN
GoldenEye NaN NaN NaN NaN NaN NaN
Goldfinger 1964.0 Sean Connery Guy Hamilton 820.4 18.6 3.2
Licence to Kill NaN NaN NaN NaN NaN NaN
Live and Let Die NaN NaN NaN NaN NaN NaN
Moonraker NaN NaN NaN NaN NaN NaN
Never Say Never Again NaN NaN NaN NaN NaN NaN
Octopussy NaN NaN NaN NaN NaN NaN
On Her Majesty's Secret Service NaN NaN NaN NaN NaN NaN
Quantum of Solace NaN NaN NaN NaN NaN NaN
Skyfall 2012.0 Daniel Craig Sam Mendes 943.5 170.2 14.5
Spectre NaN NaN NaN NaN NaN NaN
The Living Daylights NaN NaN NaN NaN NaN NaN
The Man with the Golden Gun NaN NaN NaN NaN NaN NaN
The Spy Who Loved Me NaN NaN NaN NaN NaN NaN
The World Is Not Enough NaN NaN NaN NaN NaN NaN
Thunderball 1965.0 Sean Connery Terence Young 848.1 41.9 4.7
Tomorrow Never Dies NaN NaN NaN NaN NaN NaN
You Only Live Twice NaN NaN NaN NaN NaN NaN

In [239]:
mask2 = bond['Box Office']>800
bond.where(mask & mask2)
bond.where(mask | mask2)


Out[239]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Casino Royale NaN NaN NaN NaN NaN NaN
Diamonds Are Forever 1971.0 Sean Connery Guy Hamilton 442.5 34.7 5.8
Die Another Day NaN NaN NaN NaN NaN NaN
Dr. No 1962.0 Sean Connery Terence Young 448.8 7.0 0.6
For Your Eyes Only NaN NaN NaN NaN NaN NaN
From Russia with Love 1963.0 Sean Connery Terence Young 543.8 12.6 1.6
GoldenEye NaN NaN NaN NaN NaN NaN
Goldfinger 1964.0 Sean Connery Guy Hamilton 820.4 18.6 3.2
Licence to Kill NaN NaN NaN NaN NaN NaN
Live and Let Die NaN NaN NaN NaN NaN NaN
Moonraker NaN NaN NaN NaN NaN NaN
Never Say Never Again 1983.0 Sean Connery Irvin Kershner 380.0 86.0 NaN
Octopussy NaN NaN NaN NaN NaN NaN
On Her Majesty's Secret Service NaN NaN NaN NaN NaN NaN
Quantum of Solace NaN NaN NaN NaN NaN NaN
Skyfall 2012.0 Daniel Craig Sam Mendes 943.5 170.2 14.5
Spectre NaN NaN NaN NaN NaN NaN
The Living Daylights NaN NaN NaN NaN NaN NaN
The Man with the Golden Gun NaN NaN NaN NaN NaN NaN
The Spy Who Loved Me NaN NaN NaN NaN NaN NaN
The World Is Not Enough NaN NaN NaN NaN NaN NaN
Thunderball 1965.0 Sean Connery Terence Young 848.1 41.9 4.7
Tomorrow Never Dies NaN NaN NaN NaN NaN NaN
You Only Live Twice 1967.0 Sean Connery Lewis Gilbert 514.2 59.9 4.4

Filtering -> query()


In [240]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[240]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [241]:
# we need to avoid space in columns!
bond.columns = [x.replace(" ", "_") for x in bond.columns]

In [243]:
bond.head(1)


Out[243]:
Year Actor Director Box_Office Budget Bond_Actor_Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1

In [245]:
bond.query('Actor == "Sean Connery"')


Out[245]:
Year Actor Director Box_Office Budget Bond_Actor_Salary
Film
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2
Never Say Never Again 1983 Sean Connery Irvin Kershner 380.0 86.0 NaN
Thunderball 1965 Sean Connery Terence Young 848.1 41.9 4.7
You Only Live Twice 1967 Sean Connery Lewis Gilbert 514.2 59.9 4.4

In [251]:
bond.query('Budget > 100')


Out[251]:
Year Actor Director Box_Office Budget Bond_Actor_Salary
Film
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9
Quantum of Solace 2008 Daniel Craig Marc Forster 514.2 181.4 8.1
Skyfall 2012 Daniel Craig Sam Mendes 943.5 170.2 14.5
Spectre 2015 Daniel Craig Sam Mendes 726.7 206.3 NaN
The World Is Not Enough 1999 Pierce Brosnan Michael Apted 439.5 158.3 13.5
Tomorrow Never Dies 1997 Pierce Brosnan Roger Spottiswoode 463.2 133.9 10.0

In [252]:
bond.query('Actor != "Sean Connery" and Budget > 100')


Out[252]:
Year Actor Director Box_Office Budget Bond_Actor_Salary
Film
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9
Quantum of Solace 2008 Daniel Craig Marc Forster 514.2 181.4 8.1
Skyfall 2012 Daniel Craig Sam Mendes 943.5 170.2 14.5
Spectre 2015 Daniel Craig Sam Mendes 726.7 206.3 NaN
The World Is Not Enough 1999 Pierce Brosnan Michael Apted 439.5 158.3 13.5
Tomorrow Never Dies 1997 Pierce Brosnan Roger Spottiswoode 463.2 133.9 10.0

In [255]:
bond.query('Actor in ["Sean Connery", "Timothy Dalton"]')


Out[255]:
Year Actor Director Box_Office Budget Bond_Actor_Salary
Film
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8
Dr. No 1962 Sean Connery Terence Young 448.8 7.0 0.6
From Russia with Love 1963 Sean Connery Terence Young 543.8 12.6 1.6
Goldfinger 1964 Sean Connery Guy Hamilton 820.4 18.6 3.2
Licence to Kill 1989 Timothy Dalton John Glen 250.9 56.7 7.9
Never Say Never Again 1983 Sean Connery Irvin Kershner 380.0 86.0 NaN
The Living Daylights 1987 Timothy Dalton John Glen 313.5 68.8 5.2
Thunderball 1965 Sean Connery Terence Young 848.1 41.9 4.7
You Only Live Twice 1967 Sean Connery Lewis Gilbert 514.2 59.9 4.4

apply() on columns


In [256]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[256]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [257]:
def my_func(number):
    return str(number) + " MILLIONS!"

In [259]:
bond['Box Office'] = bond['Box Office'].apply(my_func)
bond['Budget'] = bond['Budget'].apply(my_func)

In [260]:
bond.head(2)


Out[260]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 MILLIONS! 54.5 MILLIONS! 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 MILLIONS! 145.3 MILLIONS! 3.3

In [261]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[261]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [267]:
## Automatic Way
columns = ['Box Office', 'Budget', 'Bond Actor Salary']
for col in columns:
    bond[col] = bond[col].apply(my_func)

In [268]:
bond.head(2)


Out[268]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 MILLIONS! MILLIONS! 54.5 MILLIONS! MILLIONS! 9.1 MILLIONS! MILLIONS!
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 MILLIONS! MILLIONS! 145.3 MILLIONS! MILLIONS! 3.3 MILLIONS! MILLIONS!

apply() on rows


In [269]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[269]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [270]:
def good_movie(row):
    # my list will be something like:
    # [1985, Roger Moore, John Glen, 275.2, 54.5, 9.1]
    actor = row[1]
    budget =row[4]
    if actor == 'Pierce Brosnan':
        return 'The best'
    elif actor == 'Roger Moore' and budget > 40:
        return 'Enjoyable'
    return 'BHA!'

In [273]:
bond['is_good'] = bond.apply(good_movie, axis=1)

In [274]:
bond.head(5)


Out[274]:
Year Actor Director Box Office Budget Bond Actor Salary is_good
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1 Enjoyable
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3 BHA!
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN BHA!
Diamonds Are Forever 1971 Sean Connery Guy Hamilton 442.5 34.7 5.8 BHA!
Die Another Day 2002 Pierce Brosnan Lee Tamahori 465.4 154.2 17.9 The best

copy()


In [275]:
bond = pd.read_csv("data/jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head(3)


Out[275]:
Year Actor Director Box Office Budget Bond Actor Salary
Film
A View to a Kill 1985 Roger Moore John Glen 275.2 54.5 9.1
Casino Royale 2006 Daniel Craig Martin Campbell 581.5 145.3 3.3
Casino Royale 1967 David Niven Ken Hughes 315.0 85.0 NaN

In [280]:
copy_of_bond_by_value = bond.copy()
copy_of_bond_by_reference = bond
copy_of_bond_by_value is bond, copy_of_bond_by_reference is bond


Out[280]:
(False, True)

In [282]:
series_copy_by_value = bond['Box Office'].copy()
series_copy_by_ref = bond['Box Office']
series_copy_by_value is bond['Box Office'], series_copy_by_ref is bond['Box Office']


Out[282]:
(False, True)

In [ ]: