In [1]:
import plotly as py
from plotly import plotly
import pandas as pd
import numpy as np
from plotly.graph_objs import Scatter, Layout, Data, Figure, Annotation, Scatter3d
import plotly.figure_factory as ff
py.offline.init_notebook_mode(connected=True)
Below we will examine the different aspects/objects that define a plot in Plotly. These are:
We will first follow with a few examples to showcase Plotly.
In [2]:
py.offline.iplot({
"data": [Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1])],
"layout": Layout(title="hello world")
})
In [3]:
# do a table
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")
table = ff.create_table(df)
py.offline.iplot(table, filename='jupyter/table1')
Under every graph is a JSON object, which is a dictionary like data structure. Simply changing values of some keywords and we get different plots.
In [4]:
# follows trace - data - layout - figure semantic
trace1 = Scatter(
x = [1,2,3],
y = [4,5,6],
marker = {'color':'red', 'symbol':104, 'size':"10"},
mode = "markers+lines",
text = ['one','two','three'],
name = '1st Trace'
)
data = Data([trace1])
layout = Layout(
title="First Plot",
xaxis={'title':'x1'},
yaxis ={'title':'x2'}
)
figure=Figure(data=data, layout=layout)
py.offline.iplot(figure)
In [5]:
df = pd.read_csv('https://raw.githubusercontent.com/yankev/test/master/life-expectancy-per-GDP-2007.csv')
americas = df[(df.continent=='Americas')]
europe = df[(df.continent=='Europe')]
In [6]:
trace_comp0 = Scatter(
x = americas.gdp_percap,
y=americas.life_exp,
mode='markers',
marker=dict(size = 12,
line = dict(width=1),
color="navy"),
name = "Americas",
text=americas.country,
)
trace_comp1 = Scatter(
x = europe.gdp_percap,
y=europe.life_exp,
mode='markers',
marker=dict(size = 12,
line = dict(width=1),
color="orange"),
name = "Europe",
text=europe.country,
)
data = [trace_comp0, trace_comp1]
layout = Layout(
title="YOUR MUM", # sorry
hovermode="closest",
xaxis=dict(
title='GDP per capita (2000 dollars)',
ticklen=5,
zeroline=False,
gridwidth=2,
),
yaxis=dict(
title="Life expectancy (years)"
)
)
fig = Figure(data=data, layout=layout)
py.offline.iplot(fig)
We see that data is actually a list object in Python. Data will actually contain all the traces that you wish to plot. Now the question may be, what is a trace? A trace is just the name we give a collection of data and the specifications of which we want that data plotted. Notice that a trace will also be an object itself, and these will be named according to how you want the data displayed on the plotting surface.
In [7]:
# generate data
x = np.linspace(0,np.pi*8,100)
y = np.sin(x)
z = np.cos(x)
layout = Layout(
title="My First Plotly Graph",
xaxis = dict(
title="x"),
yaxis = dict(title="sin(x)")
)
trace1 = Scatter(
x = x,
y = y,
mode = "lines",
marker = dict(
size=8,
color="navy"
),
name="Sin(x)"
)
trace2 = Scatter(
x = x,
y = z,
mode = "markers+lines",
marker = dict(
size=8,
color="red"
),
name="Cos(x)",
opacity=0.5
)
# load data and fig with nec
data = Data([trace1,trace2])
fig = Figure(data=data, layout=layout)
#plot
py.offline.iplot(fig)
In [8]:
# look at hover text
x = np.arange(1,3.2,0.2)
y = 6*np.sin(x)
layout = Layout(
title="My Second Plotly Graph",
xaxis = dict(
title="x"),
yaxis = dict(title="6 * sin(x)")
)
trace1 = Scatter(
x=[1,2,3],
y=[4,5,6],
marker={'color': 'red', 'symbol': 104, 'size': "10"},
mode="markers+lines",
text=["one","two","three"],
name="first trace")
trace2 = Scatter(x=x,
y=y,
marker={'color': 'blue', 'symbol': 'star', 'size': 10},
mode='markers',
name='2nd trace')
data = Data([trace1,trace2])
fig = Figure(data=data,layout=layout)
py.offline.iplot(fig)
The Layout object will define the look of the plot, and plot features which are unrelated to the data. So we will be able to change things like the title, axis titles, spacing, font and even draw shapes on top of your plot!
In [9]:
layout
Out[9]:
In [10]:
# highest point
layout.update(dict(
annotations=[Annotation(
text="Highest Point",
x=3,
y=6)]
)
)
py.offline.iplot(Figure(data=data, layout=layout), filename='pyguide_4')
In [11]:
#lowest point
layout.update(dict(
annotations = [Annotation(
text = "lowest point",
x=1,
y=4)]))
py.offline.iplot(Figure(data=data,layout=layout))
In [12]:
layout.update(dict(
annotations=[Annotation(
text="Highest Point",
x=3,
y=6)],
shapes = [
# 1st highlight during Feb 4 - Feb 6
{
'type': 'rect',
# x-reference is assigned to the x-values
'xref': 'x',
# y-reference is assigned to the plot paper [0,1]
'yref': 'y',
'x0': '1',
'y0': 0,
'x1': '2',
'y1': 7,
'fillcolor': '#d3d3d3',
'opacity': 0.2,
'line': {
'width': 0,
}
}]
)
)
py.offline.iplot(Figure(data=data, layout=layout), filename='pyguide_4')
In [13]:
# plot scatter with color
x = np.random.randint(0,100,100)
y = [x + np.random.randint(-100,100) for x in x]
z = np.random.randint(0,3,100)
In [14]:
layout = Layout(
title = "Color Scatter Plot",
xaxis = dict(title="x"),
yaxis = dict(title="y")
)
trace1 = Scatter(
x = x,
y = y,
mode="markers",
marker=dict(
size = 12,
color = z,
colorscale = "heatmap-discrete-colorscale",
showscale=True
)
)
data = Data([trace1])
fig = Figure(data=data,layout=layout)
py.offline.iplot(fig)
In [15]:
# a better implementation would be to use different traces for different colors
df = pd.DataFrame({
'x':x,
'y':y,
'z':z
})
df.z.value_counts()
Out[15]:
In [16]:
layout = Layout(
title = "Color Scatter Plot (Improved)",
xaxis = dict(title="x"),
yaxis = dict(title="y")
)
trace1 = Scatter(
x = df.query('z==0')['x'],
y = df.query('z==0')['y'],
mode="markers",
marker=dict(
size = 12,
color = "orange",
),
name = "Z = 0"
)
trace2 = Scatter(
x = df.query('z==1')['x'],
y = df.query('z==1')['y'],
mode="markers",
marker=dict(
size = 12,
color = "red",
),
name="Z = 1"
)
trace3 = Scatter(
x = df.query('z==2')['x'],
y = df.query('z==2')['y'],
mode="markers",
marker=dict(
size = 12,
color = "blue",
),
name="Z = 2"
)
data = Data([trace1, trace2,trace3])
fig = Figure(data=data,layout=layout)
py.offline.iplot(fig)
In [17]:
# 3d scatter plot
x = np.random.randint(0,100,100)
y = np.random.randint(0,100,100)
z = np.random.randint(0,10,100)
layout = Layout(
title="3d Scatter Plot",
xaxis = dict(
title = "X"
)
)
trace0 = Scatter3d(
x=x,
y=y,
z=z,
mode="markers",
marker = dict(
size=6,
color=z,
colorscale="Plasma",
opacity=0.6
)
)
data = Data([trace0])
fig = Figure(data=data, layout=layout)
py.offline.iplot(fig)
In [18]:
# 3d bubble charts using pokemon data
# URL: https://www.kaggle.com/rounakbanik/pokemon/data
dataset = pd.read_csv("pokemon.csv")
dataset.dtypes
In [ ]:
layout = Layout(
title="Pokemon!",
autosize = False,
width= 1000,
height= 1000,
scene = dict(
zaxis=dict(title="Attack"),
yaxis=dict(title="Defense"),
xaxis=dict(title="Type 1 Class.")
)
)
trace0 = Scatter3d(
z = dataset.attack,
y = dataset.defense,
x = dataset.type1,
text = dataset.name,
mode = "markers",
marker = dict(
size = dataset.weight_kg/10,
opacity = 0.5,
color = dataset.hp,
colorscale = 'Viridis',
showscale=True,
colorbar=dict(title="HP")
)
)
data = Data([trace0])
fig = Figure(data=data, layout=layout)
py.offline.iplot(fig)
In [ ]: