Instructor: Luca Foschini (email: luca@evidation.com) (twitter: @calimagna)
Format: Lecture and hands-on
It's safe to say that every internet service has an API for Python:
Examples:
Everything nicely integrated in notebooks, and can be easily turned into slides
In [5]:
# Example 1:
# do something fun with the weather API
Introduction: http://pandas.pydata.org/pandas-docs/stable/10min.html
Tutorial on data wrangling:
In [3]:
# Run some exploration on tutorial
%matplotlib inline
import pandas as pd
import matplotlib
matplotlib.style.use('ggplot')
#montreal weather
weather_url = "https://raw.githubusercontent.com/jvns/pandas-cookbook/master/data/weather_2012.csv"
weather_2012_final = pd.read_csv(weather_url, parse_dates='Date/Time', index_col='Date/Time')
weather_2012_final['Temp (C)'].plot(figsize=(15, 6))
#weather_2012_final.to_hdf()
Out[3]:
In [18]:
print weather_2012_final[weather_2012_final['Weather'] == 'Cloudy']['Temp (C)'].median()
print weather_2012_final[weather_2012_final['Weather'] == 'Snow']['Temp (C)'].median()
weather_2012_final.to_hdf('ciao.h5', compression='blocs')
In [24]:
# Run the example above
def closest(position, positions):
x0, y0 = position
dbest, ibest = None, None
for i, (x, y) in enumerate(positions):
d = (x - x0) ** 2 + (y - y0) ** 2
if dbest is None or d < dbest:
dbest, ibest = d, i
return ibest
In [26]:
import random
positions = [(random.random(), random.random()) for _ in xrange(10000000)]
In [27]:
%timeit closest((.5, .5), positions)
In [37]:
positions = np.random.rand(10000000,2)
In [38]:
x, y = positions[:,0], positions[:,1]
In [39]:
distances = (x - .5) ** 2 + (y - .5) ** 2
In [40]:
%timeit exec In[39]
In [8]:
from IPython.display import Image
Image(url='http://i.imgur.com/k0t1e.png')
Out[8]:
In [14]:
!pip install ipyparallel
In [2]:
# Example: Run some parallel code
from ipyparallel import Client
client = Client(profile='mycluster')
%px print("Hello from the cluster engines!")
Intro and examples here
In [41]:
%matplotlib inline
import networkx as nx
import matplotlib.pyplot as plt
from IPython.display import Image
n = 10
m = 20
rgraph1 = nx.gnm_random_graph(n,m)
print "Nodes: ", rgraph1.nodes()
print "Edges: ", rgraph1.edges()
In [42]:
if nx.is_connected(rgraph1):
print "Graph is connected"
else:
print "Graph is not connected"
In [43]:
print "Diameter of graph is ", nx.diameter(rgraph1)
nx.draw(rgraph1)
plt.draw()
In [ ]:
elarge=[(u,v) for (u,v) in rgraph1.edges() if u + v >= 9]
esmall=[(u,v) for (u,v) in rgraph1.edges() if u + v < 9]
pos=nx.spring_layout(rgraph1) # positions for all nodes
# nodes
nx.draw_networkx_nodes(rgraph1,pos,node_size=700)
# edges
nx.draw_networkx_edges(rgraph1,pos,edgelist=elarge,
width=6,edge_color='r')
nx.draw_networkx_edges(rgraph1,pos,edgelist=esmall,
width=6,alpha=0.5,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(rgraph1,pos,font_size=20,)
plt.axis('off')
plt.savefig("data/weighted_graph.png") # save as png
plt.show() # display
In [44]:
T = nx.dfs_tree(rgraph1,0)
print "DFS Tree edges : ", T.edges()
T = nx.bfs_tree(rgraph1, 0)
print "BFS Tree edges : ", T.edges()
Extend the analysis provided here:
In [ ]: