In [2]:
import numpy as np
import pandas as pd
import sklearn as sk
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
labels = pd.read_csv('trainLabels.csv', header=True)

In [7]:
labels.head()


Out[7]:
circle0 0
0 circle1 0
1 circle2 0
2 circle3 0
3 circle4 0
4 circle5 0

In [4]:
data = pd.read_csv('layer2.csv', header=False)

In [6]:
data.head()


Out[6]:
5.667200088500976562e-01 7.970024645328521729e-02 2.027546428143978119e-02 2.690069973468780518e-01 1.842850148677825928e-01 2.951270341873168945e-01 1.841731136664748192e-03 4.442757666110992432e-01 3.289907574653625488e-01 2.179550230503082275e-01 ... 9.691107869148254395e-01 9.359015151858329773e-03 1.865154504776000977e-02 1.093757271766662598e+00 6.872798316180706024e-03 5.407567620277404785e-01 3.315060734748840332e-01 1.024074077606201172e+00 3.251272141933441162e-01 7.077471613883972168e-01
0 0.359596 0.030360 0.042114 0.557201 0.292468 0.576244 0.040323 0.235649 0.360628 0.095723 ... 0.973673 0.005746 0.189654 1.126736 0.018448 0.063515 0.795952 0.191133 0.985414 0.511179
1 0.384189 0.026059 0.013829 0.598136 0.185632 0.534398 0.062535 0.373995 0.421869 0.087321 ... 0.974974 0.026356 0.253659 1.079788 0.021054 0.043340 0.797725 0.295483 0.835673 0.682395
2 0.616859 0.059887 0.021190 0.369321 0.141765 0.648689 0.008564 0.287725 0.474470 0.107217 ... 1.012346 0.000000 0.100739 1.074590 0.006698 0.154095 0.485839 0.569061 0.434044 0.669629
3 0.531459 0.035479 0.030622 0.283461 0.167785 0.549207 0.000000 0.317763 0.365595 0.166181 ... 0.925976 0.000000 0.021144 1.099319 0.022268 0.292321 0.480380 0.889247 0.325880 0.815859
4 0.680941 0.090619 0.000000 0.237210 0.148403 0.351544 0.003641 0.384987 0.401681 0.143233 ... 0.953336 0.001316 0.013883 1.183898 0.011917 0.555546 0.251101 1.116223 0.196840 0.753510

5 rows × 2048 columns


In [8]:
from sklearn.manifold import TSNE
X = data.values
model = TSNE(n_components=2, random_state=0)
vals = model.fit_transform(X)

In [9]:
vals.shape


Out[9]:
(799, 2)

In [10]:
labels.values.shape


Out[10]:
(799, 2)

In [12]:
forGraph = np.hstack([vals,labels])

In [13]:
header = ["x","y","name","class"]

In [15]:
forSns = pd.DataFrame(forGraph, columns = header)

In [28]:
rightBlob = forSns[forSns['y'] >-8]

In [29]:
rightBlob[rightBlob['class'] == 1]


Out[29]:
x y name class
204 1.49786 -7.183717 cross5 1
259 2.383465 -6.620029 cross60 1
275 1.489795 -7.538162 cross76 1
316 -0.1864092 -6.578962 cross117 1
353 3.208965 -7.425173 cross154 1
355 -0.03169287 -7.425762 cross156 1

In [19]:
forSns.shape


Out[19]:
(799, 4)

In [20]:
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")


sns.lmplot('x', 'y',
           data=forSns,
           hue="class",fit_reg=False)


Out[20]:
<seaborn.axisgrid.FacetGrid at 0x10f183890>

In [35]:
model2 = TSNE(n_components=2, random_state=1)
vals2 = model2.fit_transform(X) 
forGraph2 = np.hstack([vals2,labels])
forSns2 = pd.DataFrame(forGraph2, columns = header)
sns.lmplot('x', 'y',
           data=forSns2,
           hue="class",fit_reg=False)


Out[35]:
<seaborn.axisgrid.FacetGrid at 0x1037bb250>

In [37]:
blob = forSns2[forSns2['y'] >0]
blob[blob['class'] == 2]


Out[37]:
x y name class
399 -2.056647 15.89713 square0 2
400 -4.813116 14.95008 square1 2
402 1.756608 8.049464 square3 2
403 -2.481747 14.25869 square4 2
404 -3.306489 16.29915 square5 2
405 0.5664254 9.20581 square6 2
407 0.1484442 9.563039 square8 2
408 -6.303011 14.5425 square9 2
412 -6.579982 16.04453 square13 2
415 -9.384882 13.29236 square16 2
418 -1.133297 10.93039 square19 2
419 0.3598716 8.027057 square20 2
420 -10.43315 13.60498 square21 2
423 -2.221226 11.12305 square24 2
424 -2.735218 16.40695 square25 2
427 -7.488565 12.00093 square28 2
428 1.048198 9.539005 square29 2
429 -3.563986 15.57099 square30 2
430 -4.052325 16.19847 square31 2
431 -1.844176 14.77474 square32 2
433 0.587014 8.544686 square34 2
434 0.1744594 8.441482 square35 2
435 1.339437 9.025338 square36 2
436 1.642125 8.220306 square37 2
438 -7.344256 14.07951 square39 2
439 1.811777 7.774795 square40 2
440 -1.596161 11.28753 square41 2
442 -8.05571 13.83245 square43 2
443 -0.0375234 10.38117 square44 2
444 -2.685741 14.99218 square45 2
... ... ... ... ...
548 -2.223571 14.05082 square149 2
549 -3.274544 14.84027 square150 2
550 -9.815501 13.67347 square151 2
551 -7.180059 12.96168 square152 2
554 -1.801794 15.25396 square155 2
555 -11.10398 13.55253 square156 2
557 -11.62535 13.87594 square158 2
561 -5.640322 14.65102 square162 2
564 -3.537676 11.38061 square165 2
566 -3.620265 11.5668 square167 2
567 -1.628158 14.50681 square168 2
568 -1.316024 13.02494 square169 2
569 -6.988426 12.50407 square170 2
570 1.167869 8.524993 square171 2
573 -7.112928 15.65675 square174 2
574 -0.3289048 13.77844 square175 2
576 -6.188864 14.25138 square177 2
577 -7.597609 12.674 square178 2
580 -11.67328 14.485 square181 2
583 -9.916943 13.75809 square184 2
585 -10.07133 14.16314 square186 2
586 0.0646655 9.340726 square187 2
587 0.9374826 9.668843 square188 2
588 -1.671148 11.07228 square189 2
589 -7.491778 15.71663 square190 2
591 -7.407856 15.83671 square192 2
592 -2.26066 16.12686 square193 2
593 1.927207 7.607041 square194 2
594 -2.466143 12.93632 square195 2
597 -9.615267 13.52132 square198 2

110 rows × 4 columns


In [38]:
X.shape


Out[38]:
(799, 2048)

In [ ]: