In [1]:
import matplotlib.pyplot as plt

In [2]:
plt.plot(xrange(10))


Out[2]:
[<matplotlib.lines.Line2D at 0x957e36c>]

In [3]:
import numpy as np

In [4]:
import pandas

In [5]:
plt.ylabel('some numbers')


Out[5]:
<matplotlib.text.Text at 0x987634c>

In [7]:
plt.scatter(np.arange(1,10,.5), np.arange(10,20,.5))


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-1e772bce63b6> in <module>()
----> 1 plt.scatter(np.arange(1,10,.5), np.arange(10,20,.5))

/usr/lib/pymodules/python2.7/matplotlib/pyplot.pyc in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, hold, **kwargs)
   2555         ax.hold(hold)
   2556     try:
-> 2557         ret = ax.scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, **kwargs)
   2558         draw_if_interactive()
   2559     finally:

/usr/lib/pymodules/python2.7/matplotlib/axes.pyc in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, **kwargs)
   5793         y = np.ma.ravel(y)
   5794         if x.size != y.size:
-> 5795             raise ValueError("x and y must be the same size")
   5796 
   5797         s = np.ma.ravel(s)  # This doesn't have to match x, y in size.

ValueError: x and y must be the same size

In [8]:
np.arange(1,10,.5)


Out[8]:
array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])

In [9]:
len(np.arange(1,10,.5))


Out[9]:
18

In [11]:
len(np.arange(10,20,.5))


Out[11]:
20

In [13]:
plt.plot([x for x in np.arange(1,10,.5)], [x*x for x in np.arange(1,10,.5)])
plt.xlabel("Numbers:1-10")
plt.ylabel("Squares of numbers")


Out[13]:
<matplotlib.text.Text at 0x9ce9acc>

In [19]:
plt.plot([x for x in xrange(10)] , [np.exp(x) for x in xrange(10)], 'rs--')


Out[19]:
[<matplotlib.lines.Line2D at 0xa47e64c>]

In [24]:
t = np.arange(0., 5, .2)

In [25]:
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')


Out[25]:
[<matplotlib.lines.Line2D at 0xa84768c>,
 <matplotlib.lines.Line2D at 0xa847a4c>,
 <matplotlib.lines.Line2D at 0xa847e2c>]

In [35]:
plt.plot([1,2,3,4], [1,4,9,16], 'g^--')
plt.axis([0, 8, 0, 25])
plt.show()



In [36]:
def f(t):
    return np.exp(-t) * np.cos(2*np.pi*t)

In [37]:
t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)

In [38]:
t1


Out[38]:
array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9])

In [40]:
len(t2)


Out[40]:
250

In [59]:
plt.figure(1)
plt.subplot(211)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()



In [46]:
plt.figure(2)
plt.subplot(1,2,1)
plt.plot(t2, np.exp(-t2), 'gd')


Out[46]:
[<matplotlib.lines.Line2D at 0xaa510ac>]

In [55]:
plt.figure(1)
plt.subplot(311)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(3,1,2)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')


plt.subplot(3,1,3)
plt.plot(t2, np.exp(-t2), 'gd')

plt.show()



In [60]:
plt.figure(1)                # the first figure
plt.subplot(211)             # the first subplot in the first figure
plt.plot([1,2,3])
plt.subplot(212)             # the second subplot in the first figure
plt.plot([4,5,6])


plt.figure(2)                # a second figure
plt.plot([4,5,6])            # creates a subplot(111) by default

plt.figure(1)                # figure 1 current; subplot(212) still current
plt.subplot(211)             # make subplot(211) in figure1 current
plt.title('Easy as 1,2,3')   # subplot 211 title


Out[60]:
<matplotlib.text.Text at 0xaa510cc>

In [61]:
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)

In [62]:
x


Out[62]:
array([ 100.4411446 ,  121.48777778,  120.46366068, ...,  119.20778103,
        105.32686251,  108.53001066])

In [63]:
len(x)
x.shape


Out[63]:
(10000,)

In [64]:
len(x)


Out[64]:
10000

In [65]:
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)



In [66]:
n


Out[66]:
array([ 0.00003995,  0.        ,  0.00003995,  0.        ,  0.0000799 ,
        0.00011986,  0.00023971,  0.00031962,  0.00039952,  0.00063923,
        0.00131842,  0.0018378 ,  0.0027567 ,  0.00347584,  0.00531364,
        0.00663206,  0.00978828,  0.01214546,  0.01406316,  0.01781867,
        0.02009594,  0.02201365,  0.02181389,  0.02632848,  0.02788662,
        0.02712753,  0.02696772,  0.02544953,  0.02181389,  0.01897728,
        0.01701963,  0.01530168,  0.01342393,  0.01006795,  0.00858972,
        0.00595287,  0.00407512,  0.00335598,  0.00227727,  0.00151818,
        0.00079904,  0.00047943,  0.00043947,  0.00027967,  0.00003995,
        0.00019976,  0.00011986,  0.        ,  0.        ,  0.0000799 ])

In [67]:
len(n)


Out[67]:
50

In [68]:
n.shape


Out[68]:
(50,)

In [69]:
bins.shape


Out[69]:
(51,)

In [79]:
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='c', alpha=0.75)

plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
plt.axis([40, 160, 0, 0.03])
plt.grid(True)
plt.show()



In [81]:
ax = plt.subplot(111)

t = np.arange(0.0, 5.0, 0.01)
s = np.cos(2*np.pi*t)
line, = plt.plot(t, s, lw=2)

plt.annotate('local max', xy=(2, 1), xytext=(3, 1.5),
            arrowprops=dict(facecolor='green', shrink=0.05),
            )

plt.ylim(-2,2)
plt.show()



In [1]:
from sklearn.datasets import make_classification

In [2]:
from sklearn.cross_validation import cross_val_score

In [3]:
from sklearn import svm, metrics

In [4]:
X,y = make_classification(n_samples=5000, n_features=5, random_state=0)

In [5]:
X.shape, y.shape


Out[5]:
((5000, 5), (5000,))

In [6]:
y


Out[6]:
array([ 1.,  0.,  1., ...,  1.,  0.,  0.])

In [7]:
clf = svm.SVC()

In [11]:
cross_val_score(clf, X, y, cv=10)


Out[11]:
array([ 0.924,  0.914,  0.926,  0.918,  0.932,  0.928,  0.916,  0.918,
        0.912,  0.906])

In [12]:
clf = svm.SVC(kernel='rbf')

In [13]:
cross_val_score(clf, X, y, cv=10)


Out[13]:
array([ 0.924,  0.914,  0.926,  0.918,  0.932,  0.928,  0.916,  0.918,
        0.912,  0.906])

In [8]:
X.shape


Out[8]:
(5000, 5)
Caching the list of root modules, please wait!
(This will only be done once - type '%rehashx' to reset cache!)


In [9]:
from sklearn.decomposition import RandomizedPCA

In [10]:
X_pca = RandomizedPCA(n_components=2).fit_transform(X)

In [11]:
X_pca.shape


Out[11]:
(5000, 2)

In [12]:
import pylab as pl

In [26]:
pl.scatter(X_pca[y==0,0], X_pca[y==0,1], c='b')


Out[26]:
<matplotlib.collections.PathCollection at 0xd2a4bac>

In [27]:
pl.scatter(X_pca[y==1,0], X_pca[y==1, 1], c='g')


Out[27]:
<matplotlib.collections.PathCollection at 0xd92df4c>

In [25]:
pl.scatter(X_pca[y==0,0], X_pca[y==0,1], c='b')
pl.scatter(X_pca[y==1,0], X_pca[y==1, 1], c='g')


Out[25]:
<matplotlib.collections.PathCollection at 0xbb5bfcc>

In [24]:
color = ['b','g']
from itertools import cycle
for i,c in zip(np.unique(y),cycle(color)):
    pl.scatter(X_pca[y==i,0], X_pca[y==i, 1], c=c, alpha=0.8)



In [29]:
from sklearn.lda import LDA

In [34]:
clf = LDA().fit(X,y)

In [35]:
clf


Out[35]:
LDA(n_components=None, priors=None)

In [33]:
y.shape, X.shape


Out[33]:
((5000,), (5000, 5))

In [36]:
X[0]


Out[36]:
array([-1.43375112, -0.53563349, -0.60931745, -1.69068084, -1.49991083])

In [37]:
clf.predict(X[0])


Out[37]:
array([ 1.])

In [38]:
y[1]


Out[38]:
0.0

In [39]:
y[0]


Out[39]:
1.0

In [43]:
X_lda=LDA(n_components=2).fit_transform(X,y)

In [44]:
X_lda.shape


Out[44]:
(5000, 2)

In [69]:
np.unique(y)


Out[69]:
array([ 0.,  1.])

In [42]:
lda.shap


Out[42]:
(5000, 1)

In [48]:
for i,c in zip(np.unique(y), cycle(color)):
        pl.scatter(X_lda[y==i, 0], X_lda[y==i, 1], c=c, alpha=.8)



In [49]:
pl.scatter(X_lda[y==0, 0], X_lda[y==0, 1], c='b', alpha=.8)


Out[49]:
<matplotlib.collections.PathCollection at 0xd3f4d4c>

In [50]:
pl.scatter(X_lda[y==1, 0], X_lda[y==1, 1], c='g', alpha=.8)


Out[50]:
<matplotlib.collections.PathCollection at 0xd3ea14c>

In [51]:
from sklearn import datasets

In [52]:
iris = datasets.load_iris()

In [55]:
iris.DESCR


Out[55]:
'Iris Plants Database\n\nNotes\n-----\nData Set Characteristics:\n    :Number of Instances: 150 (50 in each of three classes)\n    :Number of Attributes: 4 numeric, predictive attributes and the class\n    :Attribute Information:\n        - sepal length in cm\n        - sepal width in cm\n        - petal length in cm\n        - petal width in cm\n        - class:\n                - Iris-Setosa\n                - Iris-Versicolour\n                - Iris-Virginica\n    :Summary Statistics:\n    ============== ==== ==== ======= ===== ====================\n                    Min  Max   Mean    SD   Class Correlation\n    ============== ==== ==== ======= ===== ====================\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)\n    ============== ==== ==== ======= ===== ====================\n    :Missing Attribute Values: None\n    :Class Distribution: 33.3% for each of 3 classes.\n    :Creator: R.A. Fisher\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n    :Date: July, 1988\n\nThis is a copy of UCI ML iris datasets.\nhttp://archive.ics.uci.edu/ml/datasets/Iris\n\nThe famous Iris database, first used by Sir R.A Fisher\n\nThis is perhaps the best known database to be found in the\npattern recognition literature.  Fisher\'s paper is a classic in the field and\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant.  One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\nReferences\n----------\n   - Fisher,R.A. "The use of multiple measurements in taxonomic problems"\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n     Mathematical Statistics" (John Wiley, NY, 1950).\n   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n     Structure and Classification Rule for Recognition in Partially Exposed\n     Environments".  IEEE Transactions on Pattern Analysis and Machine\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions\n     on Information Theory, May 1972, 431-433.\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II\n     conceptual clustering system finds 3 classes in the data.\n   - Many, many more ...\n'

In [56]:
iris.data.shape


Out[56]:
(150, 4)

In [57]:
iris.target.shape


Out[57]:
(150,)

In [58]:
clf = svm.SVC(kernel='rbf').fit(iris.data, iris.target)

In [59]:
clf


Out[59]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, shrinking=True, tol=0.001,
  verbose=False)

In [61]:
clf.predict(iris.data[:5])


Out[61]:
array([0, 0, 0, 0, 0])

In [62]:
iris.target_names


Out[62]:
array(['setosa', 'versicolor', 'virginica'], 
      dtype='|S10')

In [63]:
iris_pca = RandomizedPCA(n_components=2).fit_transform(iris.data)

In [80]:
iris_pca.shape


Out[80]:
(150, 2)

In [81]:
color = ['b', 'g', 'r']
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(iris_pca[iris.target==i, 0], iris_pca[iris.target==0, 1], c=c, alpha=.8)
    print i


0
1
2

In [75]:
np.unique(iris.target)


Out[75]:
array([0, 1, 2])

In [85]:
from sklearn.decomposition import PCA
iris_pca_basic = PCA(n_components=2).fit_transform(iris.data)

In [86]:
iris_pca_basic.shape


Out[86]:
(150, 2)

In [88]:
color =['b', 'g', 'r']
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(iris_pca_basic[iris.target==i,0], iris_pca_basic[iris.target==i,1],
               c=c, alpha=.8)



In [89]:
from sklearn.decomposition import ProbabilisticPCA, KernelPCA
iris_pca_probalistic = KernelPCA(n_components=2).fit_transform(iris.data)

In [91]:
iris_pca_probalistic.shape


Out[91]:
(150, 2)

In [93]:
X_p= iris_pca_probalistic
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(X_p[iris.target==i,0], X_p[iris.target==i,1], c=c, alpha=.7)



In [ ]: