Ten Jupyter/IPython essentials

Using IPython as an extended shell


In [1]:
%pwd


Out[1]:
'/home/cyrille/minibook/chapter1'

In [2]:
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip

In [3]:
%ls


Out[3]:
facebook.zip  [...]

In [4]:
!unzip facebook.zip

In [5]:
%ls


Out[5]:
facebook  facebook.zip  [...]

In [6]:
%cd facebook


Out[6]:
/home/cyrille/minibook/chapter1/facebook

In [7]:
%bookmark fbdata

In [8]:
%ls


Out[8]:
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges

In [9]:
files = !ls -1 -S | grep .edges

In [10]:
files


Out[10]:
['1912.edges',
 '107.edges',
 '1684.edges',
 '3437.edges',
 '348.edges',
 '0.edges',
 '414.edges',
 '686.edges',
 '698.edges',
 '3980.edges']

In [11]:
import os
from operator import itemgetter
# Get the name and file size of all .edges files.
files = [(file, os.stat(file).st_size)
         for file in os.listdir('.')
         if file.endswith('.edges')]
# Sort the list with the second item (file size),
# in decreasing order.
files = sorted(files,
               key=itemgetter(1),
               reverse=True)
# Only keep the first item (file name), in the same order.
files = [file for (file, size) in files]

In [12]:
!head -n5 {files[0]}


Out[12]:
2290 2363
2346 2025
2140 2428
2201 2506
2425 2557

Learning magic commands


In [13]:
%lsmagic


Out[13]:
Available line magics:
%alias  %alias_magic  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %install_default_config  %install_ext  %install_profiles  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %popd  %pprint  %precision  %profile  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%debug  %%file  %%html  %%javascript  %%latex  %%perl  %%prun  %%pypy  %%python  %%python2  %%python3  %%ruby  %%script  %%sh  %%svg  %%sx  %%system  %%time  %%timeit  %%writefile

Automagic is ON, % prefix IS NOT needed for line magics.

In [14]:
%history?

In [15]:
%history -l 5


Out[15]:
files = !ls -1 -S | grep .edges
files
!head -n5 {files[0]}
%lsmagic
%history?

In [16]:
# how many minutes in a day?
24 * 60


Out[16]:
1440

In [17]:
# and in a year?
_ * 365


Out[17]:
525600

In [18]:
%%capture output
%ls

In [19]:
output.stdout


Out[19]:
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges

In [20]:
%%bash
cd ..
touch _HEY
ls
rm _HEY
cd facebook


Out[20]:
_HEY
facebook
facebook.zip
[...]

In [21]:
%%script ghci
putStrLn "Hello world!"


Out[21]:
GHCi, version 7.6.3: http://www.haskell.org/ghc/  :? for help
Loading package ghc-prim ... linking ... done.
Loading package integer-gmp ... linking ... done.
Loading package base ... linking ... done.
Prelude> Hello world!
Prelude> Leaving GHCi.

In [22]:
%%writefile myfile.txt
Hello world!


Out[22]:
Writing myfile.txt

In [23]:
!more myfile.txt


Out[23]:
Hello world!

In [24]:
!rm myfile.txt

Mastering tab completion


In [25]:
%cd fbdata
%ls


Out[25]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges

Writing interactive documents in the Notebook with Markdown

Creating interactive widgets in the Notebook


In [26]:
from IPython.display import YouTubeVideo
YouTubeVideo('j9YpkSX7NNM')

In [27]:
from ipywidgets import interact  # IPython.html.widgets before IPython 4.0
@interact(x=(0, 10))
def square(x):
    return("The square of %d is %d." % (x, x**2))


Out[27]:
'The square of 7 is 49.'

Running Python scripts from IPython


In [28]:
%cd fbdata
%cd ..


Out[28]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook

In [29]:
%%writefile egos.py
import sys
import os
# We retrieve the folder as the first positional argument
# to the command-line call
if len(sys.argv) > 1:
    folder = sys.argv[1]
# We list all files in the specified folder
files = os.listdir(folder)
# ids contains the list of idenfitiers
identifiers = [int(file.split('.')[0]) for file in files]
# Finally, we remove duplicates with set(), and sort the list
# with sorted().
ids = sorted(set(identifiers))


Out[29]:
Overwriting egos.py

In [30]:
%run egos.py facebook

In [31]:
ids


Out[31]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]

In [32]:
folder = 'facebook'

In [33]:
%run egos.py

In [34]:
%run -i egos.py

In [35]:
ids


Out[35]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]

Introspecting Python objects


In [36]:
import networkx

In [37]:
networkx.Graph?

Debugging Python code

Benchmarking Python code


In [38]:
%cd fbdata


Out[38]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook

In [39]:
import networkx

In [40]:
graph = networkx.read_edgelist('107.edges')

In [41]:
len(graph.nodes()), len(graph.edges())


Out[41]:
(1034, 26749)

In [42]:
networkx.is_connected(graph)


Out[42]:
True

In [43]:
%timeit networkx.is_connected(graph)


Out[43]:
100 loops, best of 3: 5.92 ms per loop

Profiling Python code


In [44]:
import networkx

In [45]:
def ncomponents(file):
    graph = networkx.read_edgelist(file)
    return networkx.number_connected_components(graph)

In [46]:
import glob
def ncomponents_files():
    return [(file, ncomponents(file))
            for file in sorted(glob.glob('*.edges'))]

In [47]:
for file, n in ncomponents_files():
    print(file.ljust(12), n, 'component(s)')


Out[47]:
0.edges      5 component(s)
107.edges    1 component(s)
1684.edges   4 component(s)
1912.edges   2 component(s)
3437.edges   2 component(s)
348.edges    1 component(s)
3980.edges   4 component(s)
414.edges    2 component(s)
686.edges    1 component(s)
698.edges    3 component(s)

In [48]:
%timeit ncomponents_files()


Out[48]:
1 loops, best of 3: 634 ms per loop

In [49]:
%prun -s cumtime ncomponents_files()


Out[49]:
2391070 function calls in 1.038 seconds

Ordered by: cumulative time

ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1    0.000    0.000    1.038    1.038 {built-in method exec}
     1    0.000    0.000    1.038    1.038 <string>:1(<module>)
    10    0.000    0.000    0.995    0.100 <string>:1(read_edgelist)
    10    0.000    0.000    0.995    0.100 decorators.py:155(_open_file)
    10    0.376    0.038    0.995    0.099 edgelist.py:174(parse_edgelist)
170174    0.279    0.000    0.350    0.000 graph.py:648(add_edge)
170184    0.059    0.000    0.095    0.000 edgelist.py:366(<genexpr>)
    10    0.000    0.000    0.021    0.002 connected.py:98(number_connected_components)
    35    0.001    0.000    0.021    0.001 connected.py:22(connected_components)