Watch Me Code 2: Plotly and Chart Studio

  • Plotly is a cloud based plotting service. It uses the popular JavaScript library D3.js.
  • Plotly is simple to use

In [ ]:
!pip install chart-studio plotly --upgrade

In [ ]:
import chart_studio as plotly
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from IPython.display import display

To use plot.ly you need to sign up for a free account then get API credentials. https://chart-studio.plot.ly/settings/api


In [ ]:
# setup the credentials
# GET YOUR OWN CREDENTIALS!!!!!!!!

plotly.tools.set_credentials_file(username='mafudge73fc', api_key='q5Lg5VEVNXO1ygKV56tl')

In [ ]:
# Start with a Simple Pandas DataFrame
grades = { 'subjects' : ['Mathematics', 'English', 'History', 'Science', 'Arts'],
           'grades' : [67, 60, 36, 61, 58]
         }
grades_df = pd.DataFrame(grades)
grades_df

To plot with plotly, we need:

  • To create a figure
  • to add a series (trace)
  • label the axis
  • show the plot

In [ ]:
grade_data = go.Bar(x=grades_df['subjects'], y=grades_df['grades'])
figure = go.Figure()
figure.add_trace(grade_data)
figure.update_layout(title='My Grades R Awesum!',
                   xaxis_title='Grades',
                   yaxis_title='Subjects')
figure.show()

Plotly express makes it easy to plot a dataframe


In [ ]:
# Same plot as a one-liner using plotly.express
px.bar(grades_df, x='subjects', y='grades', title='My Grades R Awesum')

How about an example with multiple series? For that we need to pull in another dataset


In [ ]:
cuse_weather_df = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv')
cuse_weather_df = cuse_weather_df[ cuse_weather_df['EST'].str.startswith('2015-')] 
cuse_weather_df.head(5)

In [ ]:
r = dict(color='red')
g = dict(color='green')
b = dict(color='blue')
weather_data = [
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Max TemperatureF'], mode="lines", name="Max Temp", marker=r),
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Mean TemperatureF'], mode="lines+markers", name="Mean Temp", marker=g),
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Min TemperatureF'], mode="lines", name="Min Temp", marker=b)
]

figure = go.Figure()
for w in weather_data:
    figure.add_trace(w)
figure.update_layout(title='Syracuse Weather 2015',
                   xaxis_title='Day of the Year',
                   yaxis_title='Temperature Deg F')
figure.show()

Here's another example with the Exam Scores Dataset. Shows you how much more expressive plot.ly can be.


In [ ]:
scores_df = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/exam-scores/exam-scores.csv')
scores_df = scores_df.sort_values(by='Student_Score')
scores_df[0:6]

In [ ]:
f = go.Figure()
f.add_trace(go.Scatter(x=scores_df['Letter_Grade'], y=scores_df['Completion_Time'], mode="markers", 
               marker= { 'size': scores_df['Student_Score'], 'sizemode' : 'diameter', 'sizeref' : 1.0})
           )

f.show()

In [ ]:
f = go.Figure()
f.add_trace(go.Heatmap(x=scores_df['Exam_Version'], y=scores_df['Completion_Time'], z=scores_df['Student_Score']))
f.show()

In [ ]:
# A manual sample, showing you don't need to use Pandas at all.

trace0 = go.Scatter(
    x=[1,2,3,4,5,6,7,8],
    y=[10, 15, 13, 17, 15, 12, 10, 18],
    mode = "markers",
    name = "series 2"
)
trace1 = go.Scatter(
    x=[1,2,3,4,5,6,7,8],
    y=[16, 5, 11, 9, 16, 10, 14, 12],
    mode="lines",
    name = "series 1"
)

f = go.Figure()
f.add_trace(trace0)
f.add_trace(trace1)
f.update_layout(title='Sample Chart',
                   xaxis_title='X-Axis',
                   yaxis_title='Y-Axis')
f.show()

In [ ]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder2007.csv')

# make the drawing figure
fig = go.Figure(go.Scatter(x=df.gdpPercap, y=df.lifeExp, text=df.country, mode='markers', name='2007'))
fig.update_xaxes(title_text='GDP per Capita', type='log')
fig.update_yaxes(title_text='Life Expectancy')

#save it under a specific name
py.plot(fig, filename='pandas-multiple-scatter')
#show it
fig.show()

In [ ]:
# example using px also using ipython display
from IPython.display import display, HTML

display(HTML("<h1> CHART 1</h1>"))
display(px.scatter(df, x='gdpPercap', y='lifeExp', text='country'))
display(HTML("<h1> CHART 2</h1>"))
display(px.scatter(df, x='gdpPercap', y='lifeExp', text=None))

In [ ]: