notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt



In [2]:

    
plt.plot(xrange(10))









    Out[2]:





[<matplotlib.lines.Line2D at 0x957e36c>]



In [3]:

    
import numpy as np



In [4]:

    
import pandas



In [5]:

    
plt.ylabel('some numbers')









    Out[5]:





<matplotlib.text.Text at 0x987634c>



In [7]:

    
plt.scatter(np.arange(1,10,.5), np.arange(10,20,.5))









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-1e772bce63b6> in <module>()
----> 1 plt.scatter(np.arange(1,10,.5), np.arange(10,20,.5))

/usr/lib/pymodules/python2.7/matplotlib/pyplot.pyc in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, hold, **kwargs)
   2555         ax.hold(hold)
   2556     try:
-> 2557         ret = ax.scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, **kwargs)
   2558         draw_if_interactive()
   2559     finally:

/usr/lib/pymodules/python2.7/matplotlib/axes.pyc in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, faceted, verts, **kwargs)
   5793         y = np.ma.ravel(y)
   5794         if x.size != y.size:
-> 5795             raise ValueError("x and y must be the same size")
   5796 
   5797         s = np.ma.ravel(s)  # This doesn't have to match x, y in size.

ValueError: x and y must be the same size



In [8]:

    
np.arange(1,10,.5)









    Out[8]:





array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])



In [9]:

    
len(np.arange(1,10,.5))









    Out[9]:





18



In [11]:

    
len(np.arange(10,20,.5))









    Out[11]:





20



In [13]:

    
plt.plot([x for x in np.arange(1,10,.5)], [x*x for x in np.arange(1,10,.5)])
plt.xlabel("Numbers:1-10")
plt.ylabel("Squares of numbers")









    Out[13]:





<matplotlib.text.Text at 0x9ce9acc>



In [19]:

    
plt.plot([x for x in xrange(10)] , [np.exp(x) for x in xrange(10)], 'rs--')









    Out[19]:





[<matplotlib.lines.Line2D at 0xa47e64c>]



In [24]:

    
t = np.arange(0., 5, .2)



In [25]:

    
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')









    Out[25]:





[<matplotlib.lines.Line2D at 0xa84768c>,
 <matplotlib.lines.Line2D at 0xa847a4c>,
 <matplotlib.lines.Line2D at 0xa847e2c>]



In [35]:

    
plt.plot([1,2,3,4], [1,4,9,16], 'g^--')
plt.axis([0, 8, 0, 25])
plt.show()



In [36]:

    
def f(t):
    return np.exp(-t) * np.cos(2*np.pi*t)



In [37]:

    
t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)



In [38]:

    
t1









    Out[38]:





array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9])



In [40]:

    
len(t2)









    Out[40]:





250



In [59]:

    
plt.figure(1)
plt.subplot(211)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()



In [46]:

    
plt.figure(2)
plt.subplot(1,2,1)
plt.plot(t2, np.exp(-t2), 'gd')









    Out[46]:





[<matplotlib.lines.Line2D at 0xaa510ac>]



In [55]:

    
plt.figure(1)
plt.subplot(311)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(3,1,2)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')


plt.subplot(3,1,3)
plt.plot(t2, np.exp(-t2), 'gd')

plt.show()



In [60]:

    
plt.figure(1)                # the first figure
plt.subplot(211)             # the first subplot in the first figure
plt.plot([1,2,3])
plt.subplot(212)             # the second subplot in the first figure
plt.plot([4,5,6])


plt.figure(2)                # a second figure
plt.plot([4,5,6])            # creates a subplot(111) by default

plt.figure(1)                # figure 1 current; subplot(212) still current
plt.subplot(211)             # make subplot(211) in figure1 current
plt.title('Easy as 1,2,3')   # subplot 211 title









    Out[60]:





<matplotlib.text.Text at 0xaa510cc>



In [61]:

    
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)



In [62]:

    
x









    Out[62]:





array([ 100.4411446 ,  121.48777778,  120.46366068, ...,  119.20778103,
        105.32686251,  108.53001066])



In [63]:

    
len(x)
x.shape









    Out[63]:





(10000,)



In [64]:

    
len(x)









    Out[64]:





10000



In [65]:

    
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)



In [66]:

    
n









    Out[66]:





array([ 0.00003995,  0.        ,  0.00003995,  0.        ,  0.0000799 ,
        0.00011986,  0.00023971,  0.00031962,  0.00039952,  0.00063923,
        0.00131842,  0.0018378 ,  0.0027567 ,  0.00347584,  0.00531364,
        0.00663206,  0.00978828,  0.01214546,  0.01406316,  0.01781867,
        0.02009594,  0.02201365,  0.02181389,  0.02632848,  0.02788662,
        0.02712753,  0.02696772,  0.02544953,  0.02181389,  0.01897728,
        0.01701963,  0.01530168,  0.01342393,  0.01006795,  0.00858972,
        0.00595287,  0.00407512,  0.00335598,  0.00227727,  0.00151818,
        0.00079904,  0.00047943,  0.00043947,  0.00027967,  0.00003995,
        0.00019976,  0.00011986,  0.        ,  0.        ,  0.0000799 ])



In [67]:

    
len(n)









    Out[67]:





50



In [68]:

    
n.shape









    Out[68]:





(50,)



In [69]:

    
bins.shape









    Out[69]:





(51,)



In [79]:

    
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='c', alpha=0.75)

plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
plt.axis([40, 160, 0, 0.03])
plt.grid(True)
plt.show()



In [81]:

    
ax = plt.subplot(111)

t = np.arange(0.0, 5.0, 0.01)
s = np.cos(2*np.pi*t)
line, = plt.plot(t, s, lw=2)

plt.annotate('local max', xy=(2, 1), xytext=(3, 1.5),
            arrowprops=dict(facecolor='green', shrink=0.05),
            )

plt.ylim(-2,2)
plt.show()



In [1]:

    
from sklearn.datasets import make_classification



In [2]:

    
from sklearn.cross_validation import cross_val_score



In [3]:

    
from sklearn import svm, metrics



In [4]:

    
X,y = make_classification(n_samples=5000, n_features=5, random_state=0)



In [5]:

    
X.shape, y.shape









    Out[5]:





((5000, 5), (5000,))



In [6]:

    
y









    Out[6]:





array([ 1.,  0.,  1., ...,  1.,  0.,  0.])



In [7]:

    
clf = svm.SVC()



In [11]:

    
cross_val_score(clf, X, y, cv=10)









    Out[11]:





array([ 0.924,  0.914,  0.926,  0.918,  0.932,  0.928,  0.916,  0.918,
        0.912,  0.906])



In [12]:

    
clf = svm.SVC(kernel='rbf')



In [13]:

    
cross_val_score(clf, X, y, cv=10)









    Out[13]:





array([ 0.924,  0.914,  0.926,  0.918,  0.932,  0.928,  0.916,  0.918,
        0.912,  0.906])



In [8]:

    
X.shape









    Out[8]:





(5000, 5)






    



Caching the list of root modules, please wait!
(This will only be done once - type '%rehashx' to reset cache!)



In [9]:

    
from sklearn.decomposition import RandomizedPCA



In [10]:

    
X_pca = RandomizedPCA(n_components=2).fit_transform(X)



In [11]:

    
X_pca.shape









    Out[11]:





(5000, 2)



In [12]:

    
import pylab as pl



In [26]:

    
pl.scatter(X_pca[y==0,0], X_pca[y==0,1], c='b')









    Out[26]:





<matplotlib.collections.PathCollection at 0xd2a4bac>



In [27]:

    
pl.scatter(X_pca[y==1,0], X_pca[y==1, 1], c='g')









    Out[27]:





<matplotlib.collections.PathCollection at 0xd92df4c>



In [25]:

    
pl.scatter(X_pca[y==0,0], X_pca[y==0,1], c='b')
pl.scatter(X_pca[y==1,0], X_pca[y==1, 1], c='g')









    Out[25]:





<matplotlib.collections.PathCollection at 0xbb5bfcc>



In [24]:

    
color = ['b','g']
from itertools import cycle
for i,c in zip(np.unique(y),cycle(color)):
    pl.scatter(X_pca[y==i,0], X_pca[y==i, 1], c=c, alpha=0.8)



In [29]:

    
from sklearn.lda import LDA



In [34]:

    
clf = LDA().fit(X,y)



In [35]:

    
clf









    Out[35]:





LDA(n_components=None, priors=None)



In [33]:

    
y.shape, X.shape









    Out[33]:





((5000,), (5000, 5))



In [36]:

    
X[0]









    Out[36]:





array([-1.43375112, -0.53563349, -0.60931745, -1.69068084, -1.49991083])



In [37]:

    
clf.predict(X[0])









    Out[37]:





array([ 1.])



In [38]:

    
y[1]









    Out[38]:





0.0



In [39]:

    
y[0]









    Out[39]:





1.0



In [43]:

    
X_lda=LDA(n_components=2).fit_transform(X,y)



In [44]:

    
X_lda.shape









    Out[44]:





(5000, 2)



In [69]:

    
np.unique(y)









    Out[69]:





array([ 0.,  1.])



In [42]:

    
lda.shap









    Out[42]:





(5000, 1)



In [48]:

    
for i,c in zip(np.unique(y), cycle(color)):
        pl.scatter(X_lda[y==i, 0], X_lda[y==i, 1], c=c, alpha=.8)



In [49]:

    
pl.scatter(X_lda[y==0, 0], X_lda[y==0, 1], c='b', alpha=.8)









    Out[49]:





<matplotlib.collections.PathCollection at 0xd3f4d4c>



In [50]:

    
pl.scatter(X_lda[y==1, 0], X_lda[y==1, 1], c='g', alpha=.8)









    Out[50]:





<matplotlib.collections.PathCollection at 0xd3ea14c>



In [51]:

    
from sklearn import datasets



In [52]:

    
iris = datasets.load_iris()



In [55]:

    
iris.DESCR









    Out[55]:





'Iris Plants Database\n\nNotes\n-----\nData Set Characteristics:\n    :Number of Instances: 150 (50 in each of three classes)\n    :Number of Attributes: 4 numeric, predictive attributes and the class\n    :Attribute Information:\n        - sepal length in cm\n        - sepal width in cm\n        - petal length in cm\n        - petal width in cm\n        - class:\n                - Iris-Setosa\n                - Iris-Versicolour\n                - Iris-Virginica\n    :Summary Statistics:\n    ============== ==== ==== ======= ===== ====================\n                    Min  Max   Mean    SD   Class Correlation\n    ============== ==== ==== ======= ===== ====================\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)\n    ============== ==== ==== ======= ===== ====================\n    :Missing Attribute Values: None\n    :Class Distribution: 33.3% for each of 3 classes.\n    :Creator: R.A. Fisher\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n    :Date: July, 1988\n\nThis is a copy of UCI ML iris datasets.\nhttp://archive.ics.uci.edu/ml/datasets/Iris\n\nThe famous Iris database, first used by Sir R.A Fisher\n\nThis is perhaps the best known database to be found in the\npattern recognition literature.  Fisher\'s paper is a classic in the field and\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant.  One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\nReferences\n----------\n   - Fisher,R.A. "The use of multiple measurements in taxonomic problems"\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n     Mathematical Statistics" (John Wiley, NY, 1950).\n   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n     Structure and Classification Rule for Recognition in Partially Exposed\n     Environments".  IEEE Transactions on Pattern Analysis and Machine\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions\n     on Information Theory, May 1972, 431-433.\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II\n     conceptual clustering system finds 3 classes in the data.\n   - Many, many more ...\n'



In [56]:

    
iris.data.shape









    Out[56]:





(150, 4)



In [57]:

    
iris.target.shape









    Out[57]:





(150,)



In [58]:

    
clf = svm.SVC(kernel='rbf').fit(iris.data, iris.target)



In [59]:

    
clf









    Out[59]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, shrinking=True, tol=0.001,
  verbose=False)



In [61]:

    
clf.predict(iris.data[:5])









    Out[61]:





array([0, 0, 0, 0, 0])



In [62]:

    
iris.target_names









    Out[62]:





array(['setosa', 'versicolor', 'virginica'], 
      dtype='|S10')



In [63]:

    
iris_pca = RandomizedPCA(n_components=2).fit_transform(iris.data)



In [80]:

    
iris_pca.shape









    Out[80]:





(150, 2)



In [81]:

    
color = ['b', 'g', 'r']
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(iris_pca[iris.target==i, 0], iris_pca[iris.target==0, 1], c=c, alpha=.8)
    print i



In [75]:

    
np.unique(iris.target)









    Out[75]:





array([0, 1, 2])



In [85]:

    
from sklearn.decomposition import PCA
iris_pca_basic = PCA(n_components=2).fit_transform(iris.data)



In [86]:

    
iris_pca_basic.shape









    Out[86]:





(150, 2)



In [88]:

    
color =['b', 'g', 'r']
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(iris_pca_basic[iris.target==i,0], iris_pca_basic[iris.target==i,1],
               c=c, alpha=.8)



In [89]:

    
from sklearn.decomposition import ProbabilisticPCA, KernelPCA
iris_pca_probalistic = KernelPCA(n_components=2).fit_transform(iris.data)



In [91]:

    
iris_pca_probalistic.shape









    Out[91]:





(150, 2)



In [93]:

    
X_p= iris_pca_probalistic
for i, c in zip(np.unique(iris.target), cycle(color)):
    pl.scatter(X_p[iris.target==i,0], X_p[iris.target==i,1], c=c, alpha=.7)



In [ ]: