In [1]:

    
import plotly as py
from plotly import plotly
import pandas as pd
import numpy as np
from plotly.graph_objs import Scatter, Layout, Data, Figure, Annotation, Scatter3d
import plotly.figure_factory as ff

py.offline.init_notebook_mode(connected=True)

Below we will examine the different aspects/objects that define a plot in Plotly. These are:

Data
Layout
Figure

We will first follow with a few examples to showcase Plotly.



In [2]:

    
py.offline.iplot({
    "data": [Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1])],
    "layout": Layout(title="hello world")
})



In [3]:

    
# do a table
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")

table = ff.create_table(df)
py.offline.iplot(table, filename='jupyter/table1')

Under every graph is a JSON object, which is a dictionary like data structure. Simply changing values of some keywords and we get different plots.



In [4]:

    
# follows trace - data - layout - figure semantic

trace1 = Scatter(
    x = [1,2,3],
    y = [4,5,6],
    marker = {'color':'red', 'symbol':104, 'size':"10"},
    mode = "markers+lines",
    text = ['one','two','three'],
    name = '1st Trace'
)

data = Data([trace1])

layout = Layout(
    title="First Plot",
    xaxis={'title':'x1'},
    
    yaxis ={'title':'x2'}
)

figure=Figure(data=data, layout=layout)
py.offline.iplot(figure)



In [5]:

    
df = pd.read_csv('https://raw.githubusercontent.com/yankev/test/master/life-expectancy-per-GDP-2007.csv')
americas = df[(df.continent=='Americas')]
europe = df[(df.continent=='Europe')]



In [6]:

    
trace_comp0 = Scatter(
    x = americas.gdp_percap,
    y=americas.life_exp,
    mode='markers',
    marker=dict(size = 12,
               line = dict(width=1),
               color="navy"),
    name = "Americas",
    text=americas.country,
)

trace_comp1 = Scatter(
    x = europe.gdp_percap,
    y=europe.life_exp,
    mode='markers',
    marker=dict(size = 12,
               line = dict(width=1),
               color="orange"),
    name = "Europe",
    text=europe.country,
)

data = [trace_comp0, trace_comp1]

layout = Layout(
    title="YOUR MUM", # sorry
    hovermode="closest",
    xaxis=dict(
        title='GDP per capita (2000 dollars)',
        ticklen=5,
        zeroline=False,
        gridwidth=2,
    ),
    yaxis=dict(
        title="Life expectancy (years)"
    )

)

fig = Figure(data=data, layout=layout)
py.offline.iplot(fig)

Data

We see that data is actually a list object in Python. Data will actually contain all the traces that you wish to plot. Now the question may be, what is a trace? A trace is just the name we give a collection of data and the specifications of which we want that data plotted. Notice that a trace will also be an object itself, and these will be named according to how you want the data displayed on the plotting surface.



In [7]:

    
# generate data
x = np.linspace(0,np.pi*8,100)
y = np.sin(x)
z = np.cos(x)

layout = Layout(
    title="My First Plotly Graph",
    xaxis = dict(
        title="x"),
    yaxis = dict(title="sin(x)")
)

trace1 = Scatter(
    x = x,
    y = y,
    mode = "lines",
    marker = dict(
        size=8,
        color="navy"
    ),
    name="Sin(x)"
)

trace2 = Scatter(
    x = x,
    y = z,
    mode = "markers+lines",
    marker = dict(
        size=8,
        color="red"
    ),
    name="Cos(x)",
    opacity=0.5
)

# load data and fig with nec
data = Data([trace1,trace2])
fig = Figure(data=data, layout=layout)

#plot
py.offline.iplot(fig)



In [8]:

    
# look at hover text

x = np.arange(1,3.2,0.2)
y = 6*np.sin(x)

layout = Layout(
    title="My Second Plotly Graph",
    xaxis = dict(
        title="x"),
    yaxis = dict(title="6 * sin(x)")
)

trace1 = Scatter(
    x=[1,2,3], 
    y=[4,5,6], 
    marker={'color': 'red', 'symbol': 104, 'size': "10"},                 
    mode="markers+lines",  
    text=["one","two","three"],
name="first trace")

trace2 = Scatter(x=x, 
                 y=y, 
                 marker={'color': 'blue', 'symbol': 'star', 'size': 10}, 
                 mode='markers', 
                 name='2nd trace')

data = Data([trace1,trace2])
fig = Figure(data=data,layout=layout)

py.offline.iplot(fig)

Layout

The Layout object will define the look of the plot, and plot features which are unrelated to the data. So we will be able to change things like the title, axis titles, spacing, font and even draw shapes on top of your plot!



In [9]:

    
layout









    Out[9]:





{'title': 'My Second Plotly Graph',
 'xaxis': {'title': 'x'},
 'yaxis': {'title': '6 * sin(x)'}}

Annotations

We added a plot title as well as titles for all the axes. For fun we could add some text annotation as well in order to indicate the maximum point that's been plotted on the current plotting surface.



In [10]:

    
# highest point
layout.update(dict(
    annotations=[Annotation(
        text="Highest Point", 
        x=3, 
        y=6)]
)
             )
py.offline.iplot(Figure(data=data, layout=layout), filename='pyguide_4')



In [11]:

    
#lowest point
layout.update(dict(
annotations = [Annotation(
text = "lowest point",
x=1,
y=4)]))

py.offline.iplot(Figure(data=data,layout=layout))

Shapes

Let's add a rectangular block to highlight the section where trace 1 is above trace2.



In [12]:

    
layout.update(dict(
    annotations=[Annotation(
        text="Highest Point", 
        x=3, 
        y=6)],
    shapes = [
        # 1st highlight during Feb 4 - Feb 6
        {
            'type': 'rect',
            # x-reference is assigned to the x-values
            'xref': 'x',
            # y-reference is assigned to the plot paper [0,1]
            'yref': 'y',
            'x0': '1',
            'y0': 0,
            'x1': '2',
            'y1': 7,
            'fillcolor': '#d3d3d3',
            'opacity': 0.2,
            'line': {
                'width': 0,
            }
        }]
)
             )
py.offline.iplot(Figure(data=data, layout=layout), filename='pyguide_4')



In [13]:

    
# plot scatter with color

x = np.random.randint(0,100,100)
y = [x + np.random.randint(-100,100) for x in x]
z = np.random.randint(0,3,100)



In [14]:

    
layout = Layout(
    title = "Color Scatter Plot",
    xaxis = dict(title="x"),
    yaxis = dict(title="y")
)

trace1 = Scatter(
    x = x,
    y = y,
    mode="markers",
    marker=dict(
        size = 12,
        color = z,
        colorscale = "heatmap-discrete-colorscale",
        showscale=True
    )
)

data = Data([trace1])
fig = Figure(data=data,layout=layout)

py.offline.iplot(fig)



In [15]:

    
# a better implementation would be to use different traces for different colors

df = pd.DataFrame({
    'x':x,
    'y':y,
    'z':z
})
df.z.value_counts()









    Out[15]:





2    34
1    34
0    32
Name: z, dtype: int64



In [16]:

    
layout = Layout(
    title = "Color Scatter Plot (Improved)",
    xaxis = dict(title="x"),
    yaxis = dict(title="y")
)

trace1 = Scatter(
    x = df.query('z==0')['x'],
    y = df.query('z==0')['y'],
    mode="markers",
    marker=dict(
        size = 12,
        color = "orange",
    ),
    name = "Z = 0"
)
    
trace2 = Scatter(
    x = df.query('z==1')['x'],
    y = df.query('z==1')['y'],
    mode="markers",
    marker=dict(
        size = 12,
        color = "red",
    ),
    name="Z = 1"
)

trace3 = Scatter(
    x = df.query('z==2')['x'],
    y = df.query('z==2')['y'],
    mode="markers",
    marker=dict(
        size = 12,
        color = "blue",
    ),
    name="Z = 2"
)

data = Data([trace1, trace2,trace3])
fig = Figure(data=data,layout=layout)

py.offline.iplot(fig)



In [17]:

    
# 3d scatter plot

x = np.random.randint(0,100,100)
y = np.random.randint(0,100,100)
z = np.random.randint(0,10,100)

layout = Layout(
    title="3d Scatter Plot",
    xaxis = dict(
        title = "X"
    )
)

trace0 = Scatter3d(
    x=x,
    y=y,
    z=z,
    mode="markers",
    marker = dict(
        size=6,
        color=z,
        colorscale="Plasma",
        opacity=0.6
    )
)

data = Data([trace0])

fig = Figure(data=data, layout=layout)

py.offline.iplot(fig)



In [18]:

    
# 3d bubble charts using pokemon data
# URL: https://www.kaggle.com/rounakbanik/pokemon/data

dataset = pd.read_csv("pokemon.csv")
dataset.dtypes









    



---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-18-1f0812017b9f> in <module>()
      2 # URL: https://www.kaggle.com/rounakbanik/pokemon/data
      3 
----> 4 dataset = pd.read_csv("pokemon.csv")
      5 dataset.dtypes

/Users/tobias/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654 
--> 655         return _read(filepath_or_buffer, kwds)
    656 
    657     parser_f.__name__ = name

/Users/tobias/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    403 
    404     # Create the parser.
--> 405     parser = TextFileReader(filepath_or_buffer, **kwds)
    406 
    407     if chunksize or iterator:

/Users/tobias/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    760             self.options['has_index_names'] = kwds['has_index_names']
    761 
--> 762         self._make_engine(self.engine)
    763 
    764     def close(self):

/Users/tobias/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    964     def _make_engine(self, engine='c'):
    965         if engine == 'c':
--> 966             self._engine = CParserWrapper(self.f, **self.options)
    967         else:
    968             if engine == 'python':

/Users/tobias/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1580         kwds['allow_leading_cols'] = self.index_col is not False
   1581 
-> 1582         self._reader = parsers.TextReader(src, **kwds)
   1583 
   1584         # XXX

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__ (pandas/_libs/parsers.c:4209)()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source (pandas/_libs/parsers.c:8873)()

FileNotFoundError: File b'pokemon.csv' does not exist



In [ ]:

    
layout = Layout(
    title="Pokemon!",
    autosize = False,
    width= 1000,
    height= 1000,
    scene = dict(
        zaxis=dict(title="Attack"),
        yaxis=dict(title="Defense"),
        xaxis=dict(title="Type 1 Class.")
    )
)

trace0 = Scatter3d(
    z = dataset.attack,
    y = dataset.defense,
    x = dataset.type1,
    text = dataset.name,
    mode = "markers",
    marker = dict(
        size = dataset.weight_kg/10,
        opacity = 0.5,
        color = dataset.hp,
        colorscale = 'Viridis',
        showscale=True,
        colorbar=dict(title="HP")
    )
)

data = Data([trace0])

fig = Figure(data=data, layout=layout)

py.offline.iplot(fig)



In [ ]: