In [1]:
%pwd
Out[1]:
In [2]:
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip
In [3]:
%ls
Out[3]:
In [4]:
!unzip facebook.zip
In [5]:
%ls
Out[5]:
In [6]:
%cd facebook
Out[6]:
In [7]:
%bookmark fbdata
In [8]:
%ls
Out[8]:
In [9]:
files = !ls -1 -S | grep .edges
In [10]:
files
Out[10]:
In [11]:
import os
from operator import itemgetter
# Get the name and file size of all .edges files.
files = [(file, os.stat(file).st_size)
for file in os.listdir('.')
if file.endswith('.edges')]
# Sort the list with the second item (file size),
# in decreasing order.
files = sorted(files,
key=itemgetter(1),
reverse=True)
# Only keep the first item (file name), in the same order.
files = [file for (file, size) in files]
In [12]:
!head -n5 {files[0]}
Out[12]:
In [13]:
%lsmagic
Out[13]:
In [14]:
%history?
In [15]:
%history -l 5
Out[15]:
In [16]:
# how many minutes in a day?
24 * 60
Out[16]:
In [17]:
# and in a year?
_ * 365
Out[17]:
In [18]:
%%capture output
%ls
In [19]:
output.stdout
Out[19]:
In [20]:
%%bash
cd ..
touch _HEY
ls
rm _HEY
cd facebook
Out[20]:
In [21]:
%%script ghci
putStrLn "Hello world!"
Out[21]:
In [22]:
%%writefile myfile.txt
Hello world!
Out[22]:
In [23]:
!more myfile.txt
Out[23]:
In [24]:
!rm myfile.txt
In [25]:
%cd fbdata
%ls
Out[25]:
In [26]:
from IPython.display import YouTubeVideo
YouTubeVideo('j9YpkSX7NNM')
In [27]:
from ipywidgets import interact # IPython.html.widgets before IPython 4.0
@interact(x=(0, 10))
def square(x):
return("The square of %d is %d." % (x, x**2))
Out[27]:
In [28]:
%cd fbdata
%cd ..
Out[28]:
In [29]:
%%writefile egos.py
import sys
import os
# We retrieve the folder as the first positional argument
# to the command-line call
if len(sys.argv) > 1:
folder = sys.argv[1]
# We list all files in the specified folder
files = os.listdir(folder)
# ids contains the list of idenfitiers
identifiers = [int(file.split('.')[0]) for file in files]
# Finally, we remove duplicates with set(), and sort the list
# with sorted().
ids = sorted(set(identifiers))
Out[29]:
In [30]:
%run egos.py facebook
In [31]:
ids
Out[31]:
In [32]:
folder = 'facebook'
In [33]:
%run egos.py
In [34]:
%run -i egos.py
In [35]:
ids
Out[35]:
In [36]:
import networkx
In [37]:
networkx.Graph?
In [38]:
%cd fbdata
Out[38]:
In [39]:
import networkx
In [40]:
graph = networkx.read_edgelist('107.edges')
In [41]:
len(graph.nodes()), len(graph.edges())
Out[41]:
In [42]:
networkx.is_connected(graph)
Out[42]:
In [43]:
%timeit networkx.is_connected(graph)
Out[43]:
In [44]:
import networkx
In [45]:
def ncomponents(file):
graph = networkx.read_edgelist(file)
return networkx.number_connected_components(graph)
In [46]:
import glob
def ncomponents_files():
return [(file, ncomponents(file))
for file in sorted(glob.glob('*.edges'))]
In [47]:
for file, n in ncomponents_files():
print(file.ljust(12), n, 'component(s)')
Out[47]:
In [48]:
%timeit ncomponents_files()
Out[48]:
In [49]:
%prun -s cumtime ncomponents_files()
Out[49]: