In [2]:
"""
A simple example of an animated plot... In 3D!
"""
%matplotlib inline
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data = load_iris()

In [4]:
from pandas import DataFrame, Series
import pandas as pd

In [11]:
df = DataFrame(data.data)

In [12]:
df.head()


Out[12]:
0 1 2 3
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2

5 rows × 4 columns


In [13]:
df.columns = ['a','b','c','d']

In [14]:
target = Series(data.target)

In [15]:
target


Out[15]:
0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
...
135    2
136    2
137    2
138    2
139    2
140    2
141    2
142    2
143    2
144    2
145    2
146    2
147    2
148    2
149    2
Length: 150, dtype: int64

In [28]:
plt.figure(1)
colors = {1:'r',0:'b',2:'g'}
for i in target.unique():
    mask = target == i
    plt.scatter(df['a'][mask], df['b'][mask], c = colors[i], label = i)
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.show()



In [29]:
def scatter_df(df, x_name, y_name, groupby, colour_dict, size=(10,10)):
    """
    df - DataFrame with columns name
    x_name - the x column name
    y_name - the y column name
    groupby - the categorical column Series
    size - the plot size
    """
    plt.figure(1, figsize=size)
    for item in groupby.unique():
        mask_s = groupby == item
        plt.scatter(df[x_name][mask], df[y_name][mask], c = colour_dict[item], label = item)
    plt.xlabel(x_name)
    plt.ylabel(y_name)
    plt.legend()
    plt.show

In [32]:
test_df = DataFrame(np.random.rand(500,2))

In [33]:
test_df.head()


Out[33]:
0 1
0 0.769329 0.986802
1 0.116339 0.473210
2 0.122275 0.049640
3 0.936482 0.804029
4 0.350899 0.594228

5 rows × 2 columns


In [39]:
t = np.random.random_integers(5, size=(500))

In [41]:
color = {1:'r',2:'b',3:'g',4:'k',5:'y'}

In [49]:
group = Series(t)

In [43]:
test_df.columns = ['x_value','y_value']

In [45]:
scatter_df(test_df, 'x_value','y_value',group,color)


---------------------------------------------------------------------------
IndexingError                             Traceback (most recent call last)
<ipython-input-45-2e09f31af1ee> in <module>()
----> 1 scatter_df(test_df, 'x_value','y_value',group,color)

<ipython-input-29-1475de3bee7e> in scatter_df(df, x_name, y_name, groupby, colour_dict, size)
     10     for item in groupby.unique():
     11         mask_s = groupby == item
---> 12         plt.scatter(df[x_name][mask], df[y_name][mask], c = colour_dict[item], label = item)
     13     plt.xlabel(x_name)
     14     plt.ylabel(y_name)

/usr/lib/python2.7/dist-packages/pandas/core/series.pyc in __getitem__(self, key)
    518 
    519         if _is_bool_indexer(key):
--> 520             key = _check_bool_indexer(self.index, key)
    521 
    522         return self._get_with(key)

/usr/lib/python2.7/dist-packages/pandas/core/indexing.pyc in _check_bool_indexer(ax, key)
   1379         mask = com.isnull(result.values)
   1380         if mask.any():
-> 1381             raise IndexingError('Unalignable boolean Series key provided')
   1382 
   1383         result = result.astype(bool).values

IndexingError: Unalignable boolean Series key provided
<matplotlib.figure.Figure at 0x7f47f53ff1d0>

In [46]:
group.shape


Out[46]:
(500,)

In [47]:
test_df.shape


Out[47]:
(500, 2)

In [50]:
group == 1


Out[50]:
0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13     True
14    False
...
485    False
486    False
487    False
488    False
489    False
490    False
491    False
492    False
493    False
494    False
495    False
496    False
497    False
498    False
499    False
Length: 500, dtype: bool

In [ ]: