In [2]:
from sklearn import decomposition
from sklearn import datasets

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [4]:
np.random.seed(5)

centers = [[1, 1], [-1, -1], [1, -1]]
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [5]:
X


Out[5]:
array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4.8,  3.1,  1.6,  0.2],
       [ 5.4,  3.4,  1.5,  0.4],
       [ 5.2,  4.1,  1.5,  0.1],
       [ 5.5,  4.2,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5. ,  3.2,  1.2,  0.2],
       [ 5.5,  3.5,  1.3,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 4.4,  3. ,  1.3,  0.2],
       [ 5.1,  3.4,  1.5,  0.2],
       [ 5. ,  3.5,  1.3,  0.3],
       [ 4.5,  2.3,  1.3,  0.3],
       [ 4.4,  3.2,  1.3,  0.2],
       [ 5. ,  3.5,  1.6,  0.6],
       [ 5.1,  3.8,  1.9,  0.4],
       [ 4.8,  3. ,  1.4,  0.3],
       [ 5.1,  3.8,  1.6,  0.2],
       [ 4.6,  3.2,  1.4,  0.2],
       [ 5.3,  3.7,  1.5,  0.2],
       [ 5. ,  3.3,  1.4,  0.2],
       [ 7. ,  3.2,  4.7,  1.4],
       [ 6.4,  3.2,  4.5,  1.5],
       [ 6.9,  3.1,  4.9,  1.5],
       [ 5.5,  2.3,  4. ,  1.3],
       [ 6.5,  2.8,  4.6,  1.5],
       [ 5.7,  2.8,  4.5,  1.3],
       [ 6.3,  3.3,  4.7,  1.6],
       [ 4.9,  2.4,  3.3,  1. ],
       [ 6.6,  2.9,  4.6,  1.3],
       [ 5.2,  2.7,  3.9,  1.4],
       [ 5. ,  2. ,  3.5,  1. ],
       [ 5.9,  3. ,  4.2,  1.5],
       [ 6. ,  2.2,  4. ,  1. ],
       [ 6.1,  2.9,  4.7,  1.4],
       [ 5.6,  2.9,  3.6,  1.3],
       [ 6.7,  3.1,  4.4,  1.4],
       [ 5.6,  3. ,  4.5,  1.5],
       [ 5.8,  2.7,  4.1,  1. ],
       [ 6.2,  2.2,  4.5,  1.5],
       [ 5.6,  2.5,  3.9,  1.1],
       [ 5.9,  3.2,  4.8,  1.8],
       [ 6.1,  2.8,  4. ,  1.3],
       [ 6.3,  2.5,  4.9,  1.5],
       [ 6.1,  2.8,  4.7,  1.2],
       [ 6.4,  2.9,  4.3,  1.3],
       [ 6.6,  3. ,  4.4,  1.4],
       [ 6.8,  2.8,  4.8,  1.4],
       [ 6.7,  3. ,  5. ,  1.7],
       [ 6. ,  2.9,  4.5,  1.5],
       [ 5.7,  2.6,  3.5,  1. ],
       [ 5.5,  2.4,  3.8,  1.1],
       [ 5.5,  2.4,  3.7,  1. ],
       [ 5.8,  2.7,  3.9,  1.2],
       [ 6. ,  2.7,  5.1,  1.6],
       [ 5.4,  3. ,  4.5,  1.5],
       [ 6. ,  3.4,  4.5,  1.6],
       [ 6.7,  3.1,  4.7,  1.5],
       [ 6.3,  2.3,  4.4,  1.3],
       [ 5.6,  3. ,  4.1,  1.3],
       [ 5.5,  2.5,  4. ,  1.3],
       [ 5.5,  2.6,  4.4,  1.2],
       [ 6.1,  3. ,  4.6,  1.4],
       [ 5.8,  2.6,  4. ,  1.2],
       [ 5. ,  2.3,  3.3,  1. ],
       [ 5.6,  2.7,  4.2,  1.3],
       [ 5.7,  3. ,  4.2,  1.2],
       [ 5.7,  2.9,  4.2,  1.3],
       [ 6.2,  2.9,  4.3,  1.3],
       [ 5.1,  2.5,  3. ,  1.1],
       [ 5.7,  2.8,  4.1,  1.3],
       [ 6.3,  3.3,  6. ,  2.5],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 7.1,  3. ,  5.9,  2.1],
       [ 6.3,  2.9,  5.6,  1.8],
       [ 6.5,  3. ,  5.8,  2.2],
       [ 7.6,  3. ,  6.6,  2.1],
       [ 4.9,  2.5,  4.5,  1.7],
       [ 7.3,  2.9,  6.3,  1.8],
       [ 6.7,  2.5,  5.8,  1.8],
       [ 7.2,  3.6,  6.1,  2.5],
       [ 6.5,  3.2,  5.1,  2. ],
       [ 6.4,  2.7,  5.3,  1.9],
       [ 6.8,  3. ,  5.5,  2.1],
       [ 5.7,  2.5,  5. ,  2. ],
       [ 5.8,  2.8,  5.1,  2.4],
       [ 6.4,  3.2,  5.3,  2.3],
       [ 6.5,  3. ,  5.5,  1.8],
       [ 7.7,  3.8,  6.7,  2.2],
       [ 7.7,  2.6,  6.9,  2.3],
       [ 6. ,  2.2,  5. ,  1.5],
       [ 6.9,  3.2,  5.7,  2.3],
       [ 5.6,  2.8,  4.9,  2. ],
       [ 7.7,  2.8,  6.7,  2. ],
       [ 6.3,  2.7,  4.9,  1.8],
       [ 6.7,  3.3,  5.7,  2.1],
       [ 7.2,  3.2,  6. ,  1.8],
       [ 6.2,  2.8,  4.8,  1.8],
       [ 6.1,  3. ,  4.9,  1.8],
       [ 6.4,  2.8,  5.6,  2.1],
       [ 7.2,  3. ,  5.8,  1.6],
       [ 7.4,  2.8,  6.1,  1.9],
       [ 7.9,  3.8,  6.4,  2. ],
       [ 6.4,  2.8,  5.6,  2.2],
       [ 6.3,  2.8,  5.1,  1.5],
       [ 6.1,  2.6,  5.6,  1.4],
       [ 7.7,  3. ,  6.1,  2.3],
       [ 6.3,  3.4,  5.6,  2.4],
       [ 6.4,  3.1,  5.5,  1.8],
       [ 6. ,  3. ,  4.8,  1.8],
       [ 6.9,  3.1,  5.4,  2.1],
       [ 6.7,  3.1,  5.6,  2.4],
       [ 6.9,  3.1,  5.1,  2.3],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 6.8,  3.2,  5.9,  2.3],
       [ 6.7,  3.3,  5.7,  2.5],
       [ 6.7,  3. ,  5.2,  2.3],
       [ 6.3,  2.5,  5. ,  1.9],
       [ 6.5,  3. ,  5.2,  2. ],
       [ 6.2,  3.4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]])

In [7]:
y


Out[7]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
import pandas as pd

In [10]:
adult=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=None)

In [11]:
adult.columns=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]

In [16]:
adult.dtypes


Out[16]:
age                int64
workclass         object
fnlwgt             int64
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
capital-gain       int64
capital-loss       int64
hours-per-week     int64
native-country    object
income            object
dtype: object

In [13]:
y=adult.income.values
y


Out[13]:
array([' <=50K', ' <=50K', ' <=50K', ..., ' <=50K', ' <=50K', ' >50K'], dtype=object)

In [17]:
#Only numeric data for PCA
X=adult[["age ","fnlwgt","education-num","capital-gain","capital-loss","hours-per-week"]].values
X


Out[17]:
array([[    39,  77516,     13,   2174,      0,     40],
       [    50,  83311,     13,      0,      0,     13],
       [    38, 215646,      9,      0,      0,     40],
       ..., 
       [    58, 151910,      9,      0,      0,     40],
       [    22, 201490,      9,      0,      0,     20],
       [    52, 287927,      9,  15024,      0,     40]], dtype=int64)

fig = plt.figure(1, figsize=(4, 3)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) plt.cla()


In [18]:
pca = decomposition.PCA(n_components=3)
pca.fit(X)
X = pca.transform(X)

In [19]:
X


Out[19]:
array([[ -1.12262330e+05,   1.09991722e+03,  -8.97964130e+01],
       [ -1.06467396e+05,  -1.07425781e+03,  -9.33540711e+01],
       [  2.58676042e+04,  -1.07828362e+03,  -8.81578195e+01],
       ..., 
       [ -3.78683960e+04,  -1.07634102e+03,  -9.06102320e+01],
       [  1.17116044e+04,  -1.07785772e+03,  -8.87784724e+01],
       [  9.81490602e+04,   1.39434958e+04,  -5.93208645e+01]])

for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: ax.text3D(X[y == label, 0].mean(), X[y == label, 1].mean() + 1.5, X[y == label, 2].mean(), name, horizontalalignment='center', bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))

Reorder the labels to have colors matching the cluster results

y = np.choose(y, [1, 2, 0]).astype(np.float) ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral, edgecolor='k')

ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([])


In [20]:
%matplotlib inline

In [21]:
import seaborn as sns
iris = sns.load_dataset("iris")
sns.pairplot(iris)


Out[21]:
<seaborn.axisgrid.PairGrid at 0x1df792df160>

In [22]:
import matplotlib 
import matplotlib.pyplot as plt

In [23]:
x=[1,2,3]
y=[5,6,9]

In [24]:
plt.plot(x,y)


Out[24]:
[<matplotlib.lines.Line2D at 0x1df7ccf2908>]

In [28]:
plt.plot(x,y)
plt.xlabel('Customers')
plt.ylabel('Age')
plt.title('First Graph')


Out[28]:
<matplotlib.text.Text at 0x1df7d260c50>

In [29]:
iris=pd.read_csv('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/datasets/iris.csv')

In [30]:
iris.columns


Out[30]:
Index(['Unnamed: 0', 'Sepal.Length', 'Sepal.Width', 'Petal.Length',
       'Petal.Width', 'Species'],
      dtype='object')

In [39]:
iris.columns=['Unnamed: 0', 'Sepal_Length', 'Sepal_Width', 'Petal_Length','Petal_Width', 'Species']

In [45]:
y=iris.groupby('Species').Sepal_Length.mean().reset_index()

In [49]:
y.Sepal_Length


Out[49]:
0    5.006
1    5.936
2    6.588
Name: Sepal_Length, dtype: float64

In [50]:
x=iris.Species

In [55]:
plt.plot(iris.Sepal_Length,iris.Petal_Length)


Out[55]:
[<matplotlib.lines.Line2D at 0x1df7e890c88>]

In [59]:
plt.bar(iris.Sepal_Length,iris.Petal_Length,label='bar1',color='blue')


Out[59]:
<Container object of 150 artists>

In [62]:
iris.plot(x='Sepal_Length',y='Petal_Length')


Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df7f270a20>

In [67]:
import seaborn as sns

In [69]:
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)

In [70]:
sinplot()



In [71]:
diamonds=pd.read_csv('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/Ecdat/Diamond.csv')

In [72]:
diamonds.head()


Out[72]:
Unnamed: 0 carat colour clarity certification price
0 1 0.30 D VS2 GIA 1302
1 2 0.30 E VS1 GIA 1510
2 3 0.30 G VVS1 GIA 1510
3 4 0.30 G VS1 GIA 1260
4 5 0.31 D VS1 GIA 1641

In [75]:
diamonds=diamonds.drop( 'Unnamed: 0',1)

In [76]:
diamonds.head()


Out[76]:
carat colour clarity certification price
0 0.30 D VS2 GIA 1302
1 0.30 E VS1 GIA 1510
2 0.30 G VVS1 GIA 1510
3 0.30 G VS1 GIA 1260
4 0.31 D VS1 GIA 1641

In [77]:
sns.distplot(diamonds.price,kde=True,rug=True)


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df7f723780>

In [78]:
sns.kdeplot(diamonds.price,shade=True)


Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df7fd90470>

In [79]:
import os as os

In [80]:
os.getcwd()


Out[80]:
'C:\\Users\\KOGENTIX'

In [81]:
os.chdir('C:\\Users\\KOGENTIX\\Desktop\\trainingWeek2')

In [83]:
diamonds.to_csv("diamonds2.csv")

In [84]:
sns.jointplot(x='price',y='carat',data=diamonds)


Out[84]:
<seaborn.axisgrid.JointGrid at 0x1df7fdc51d0>

In [86]:
sns.jointplot(x='price',y='carat',kind="hex",data=diamonds)


Out[86]:
<seaborn.axisgrid.JointGrid at 0x1df7f79e390>

In [87]:
sns.jointplot(x='price',y='carat',kind="kde",data=diamonds)


Out[87]:
<seaborn.axisgrid.JointGrid at 0x1df002a26a0>

In [88]:
sns.pairplot(iris)


Out[88]:
<seaborn.axisgrid.PairGrid at 0x1df01467390>

In [89]:
sns.pairplot(diamonds)


Out[89]:
<seaborn.axisgrid.PairGrid at 0x1df02465748>

In [90]:
sns.regplot(x='price',y='carat',data=diamonds)


Out[90]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df02eba470>

In [91]:
sns.lmplot(x='price',y='carat',data=diamonds)


Out[91]:
<seaborn.axisgrid.FacetGrid at 0x1df02f70cf8>

In [93]:
sns.lmplot(x='price',y='carat',hue='colour',data=diamonds)


Out[93]:
<seaborn.axisgrid.FacetGrid at 0x1df02e91128>

In [94]:
sns.stripplot(x='colour',y='carat',data=diamonds)


Out[94]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df03187f28>

In [95]:
sns.stripplot(x='colour',y='price',data=diamonds)


Out[95]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df7ff01550>

In [98]:
sns.stripplot(x='clarity',y='price',data=diamonds)


Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df7fff26d8>

In [97]:
diamonds.head()


Out[97]:
carat colour clarity certification price
0 0.30 D VS2 GIA 1302
1 0.30 E VS1 GIA 1510
2 0.30 G VVS1 GIA 1510
3 0.30 G VS1 GIA 1260
4 0.31 D VS1 GIA 1641

In [99]:
sns.swarmplot(x='clarity',y='price',data=diamonds)


Out[99]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df0003ebe0>

In [100]:
sns.swarmplot(x='clarity',y='price',hue='colour',data=diamonds)


Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df041e7278>

In [102]:
sns.boxplot(x='clarity',y='price',hue='colour',data=diamonds)


Out[102]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df04306518>

In [103]:
sns.boxplot(hue='clarity',y='price',x='colour',data=diamonds)


Out[103]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df047a2e80>

In [104]:
sns.boxplot(y='price',x='colour',data=diamonds)


Out[104]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df04c1fa58>

In [105]:
sns.violinplot(hue='clarity',y='price',x='colour',data=diamonds)


Out[105]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df04d66a58>

In [106]:
sns.violinplot(y='price',x='colour',data=diamonds)


Out[106]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df04fa60b8>

In [107]:
sns.barplot(hue='clarity',y='price',x='colour',data=diamonds)


Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df050f9ba8>

In [108]:
sns.barplot(y='price',x='colour',data=diamonds)


Out[108]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df051e0d30>

In [109]:
sns.countplot(x='colour',data=diamonds)


Out[109]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df0537ebe0>

In [110]:
sns.pointplot(hue='clarity',y='price',x='colour',data=diamonds)


Out[110]:
<matplotlib.axes._subplots.AxesSubplot at 0x1df04325be0>

In [112]:
sns.factorplot(hue='clarity',x='colour',y='price',col='certification',data=diamonds)


Out[112]:
<seaborn.axisgrid.FacetGrid at 0x1df06afdda0>

In [113]:
sns.factorplot(hue='clarity',x='colour',y='price',col='certification',data=diamonds,kind='bar')


Out[113]:
<seaborn.axisgrid.FacetGrid at 0x1df06e52048>

In [ ]: