In [1]:

    
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

example 1



In [41]:

    
df = pd.DataFrame({
    'name':['john','mary','peter','jeff','bill','lisa','jose'],
    'age':[23,78,22,19,45,33,20],
    'gender':['M','F','M','M','M','F','M'],
    'state':['california','dc','california','dc','california','texas','texas'],
    'num_children':[2,0,0,3,2,1,4],
    'num_pets':[5,1,0,5,2,2,3]
})
df[['name','age','gender','state','num_children','num_pets']]









    Out[41]:







  
    
      
      name
      age
      gender
      state
      num_children
      num_pets
    
  
  
    
      0
      john
      23
      M
      california
      2
      5
    
    
      1
      mary
      78
      F
      dc
      0
      1
    
    
      2
      peter
      22
      M
      california
      0
      0
    
    
      3
      jeff
      19
      M
      dc
      3
      5
    
    
      4
      bill
      45
      M
      california
      2
      2
    
    
      5
      lisa
      33
      F
      texas
      1
      2
    
    
      6
      jose
      20
      M
      texas
      4
      3



In [42]:

    
df.plot(kind='scatter',x='num_children',y='num_pets',color='red')









    Out[42]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f0221647390>



In [43]:

    
df.plot(kind='bar',x='name',y='age')









    Out[43]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f0221689860>



In [44]:

    
plt.clf()
# gca stands for 'get current axis'
ax = plt.gca()

df.plot(kind='line',x='name',y='num_children',ax=ax)
df.plot(kind='line',x='name',y='num_pets', color='red', ax=ax)

plt.show()



In [45]:

    
df.groupby('state').size().plot(kind='bar')









    Out[45]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f0221500ef0>



In [46]:

    
df.groupby(['state','gender']).size().unstack().plot(kind='bar',stacked=True)
plt.show()



In [47]:

    
plt.clf()
df.groupby(['gender','state']).size().unstack().plot(kind='bar',stacked=True)
plt.legend(loc='lower right')
plt.gcf().set_size_inches(7,4)
plt.show()









    





<Figure size 432x288 with 0 Axes>

plot histogram for dataframe column



In [48]:

    
df[['age']].plot(kind='hist',bins=[0,20,40,60,80,100],rwidth=0.9)









    Out[48]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f022152df28>

stacked bar plot



In [53]:

    
import matplotlib.pyplot as plt

# create dummy variable them group by that
# set the legend to false because we'll fix it later
df.assign(
 dummy = 1   
).groupby(['dummy','state']).size().to_frame().unstack().plot(kind='bar',stacked=True,legend=False)

plt.title('Number of records by State')

# other it'll show up as 'dummy' 
plt.xlabel('state')

# disable ticks in the x axis
plt.xticks([])

# fix the legend
current_handles, _ = plt.gca().get_legend_handles_labels()
reversed_handles = reversed(current_handles)

labels = reversed(df['state'].unique())

plt.legend(reversed_handles,labels,loc='lower right')

plt.gcf().set_size_inches(7,4)
plt.show()

Stacked bar plot percentages



In [11]:

    
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# create dummy variable them group by that
# set the legend to false because we'll fix it later
df.assign(
 dummy = 1   
).groupby(['dummy','state']).size().groupby(level=0).apply(
    lambda x: 100 * x / x.sum()
).to_frame().unstack().plot(kind='bar',stacked=True,legend=False)


plt.title('Amount of records by State, normalized')


# other it'll show up as 'dummy' 
plt.xlabel('state')

# disable ticks in the x axis
plt.xticks([])

# fix the legend
current_handles, _ = plt.gca().get_legend_handles_labels()
reversed_handles = reversed(current_handles)

labels = reversed(df['state'].unique())

plt.legend(reversed_handles,labels,loc='lower right')

plt.gcf().set_size_inches(7,4)
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter())
plt.show()

percent, two-level group by



In [12]:

    
import matplotlib.ticker as mtick

df.groupby(['gender','state']).size().groupby(level=0).apply(
    lambda x: 100 * x / x.sum()
).unstack().plot(kind='bar',stacked=True,legend='reverse')

plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter())
plt.title('Amount of records by Gender and State, normalized')
# plt.legend(loc='lower right')
plt.gcf().set_size_inches(7,4)
plt.show()

date histograms



In [13]:

    
df = pd.DataFrame({
    'name':['john','lisa','peter','carl','linda','betty'],
    'date_of_birth':[
        '01/21/1988','03/10/1977','07/25/1999','01/22/1977','09/30/1968','09/15/1970'
    ]
})

df









    Out[13]:







  
    
      
      name
      date_of_birth
    
  
  
    
      0
      john
      01/21/1988
    
    
      1
      lisa
      03/10/1977
    
    
      2
      peter
      07/25/1999
    
    
      3
      carl
      01/22/1977
    
    
      4
      linda
      09/30/1968
    
    
      5
      betty
      09/15/1970



In [14]:

    
df['date_of_birth'] = pd.to_datetime(df['date_of_birth'],infer_datetime_format=True)



In [15]:

    
df.dtypes









    Out[15]:





name                     object
date_of_birth    datetime64[ns]
dtype: object



In [16]:

    
plt.clf()
df['date_of_birth'].map(lambda d: d.month).plot(kind='hist')
plt.xlabel('Month number')
plt.show()



In [26]:

    
import matplotlib.pyplot as plt

# create dummy variable them group by that
# set the legend to false because we'll fix it later
df6.assign(
 dummy = 1   
).groupby(['dummy','state']).size().to_frame().unstack().plot(kind='bar',stacked=True,legend=False)


plt.title('Number of records by State')

# other it'll show up as 'dummy' 
plt.xlabel('state')

# disable ticks in the x axis
plt.xticks([])

# fix the legend
current_handles, _ = plt.gca().get_legend_handles_labels()
reversed_handles = reversed(current_handles)

labels = reversed(df6['state'].unique())

plt.legend(reversed_handles,labels,loc='lower right')

plt.gcf().set_size_inches(7,4)
plt.show()

	name	age	gender	state	num_children	num_pets
0	john	23	M	california	2	5
1	mary	78	F	dc	0	1
2	peter	22	M	california	0	0
3	jeff	19	M	dc	3	5
4	bill	45	M	california	2	2
5	lisa	33	F	texas	1	2
6	jose	20	M	texas	4	3

	name	date_of_birth
0	john	01/21/1988
1	lisa	03/10/1977
2	peter	07/25/1999
3	carl	01/22/1977
4	linda	09/30/1968
5	betty	09/15/1970