In [47]:
import pandas as pd
import numpy as np
import sklearn as sk
In [16]:
data = pd.read_csv("QuercusOaxaca_amb.txt")
In [17]:
data.head()
Out[17]:
In [18]:
data = data.drop(['FID','FUENTE'], axis=1)
In [19]:
data.head()
Out[19]:
In [28]:
data["Nombre"] = data["Seccion"] + " " +data["Especie"]
In [29]:
data.head()
Out[29]:
In [36]:
cols = data.columns.tolist()
In [31]:
cols
Out[31]:
In [37]:
cols = [cols[-1]] + cols[2:-1]
In [38]:
cols
Out[38]:
In [ ]:
In [39]:
df = data
In [41]:
df = df.reindex(cols, axis=1)
In [43]:
features = cols[1:]
In [44]:
features
Out[44]:
In [56]:
df.head()
Out[56]:
In [45]:
x = df.loc[:, features].values
In [46]:
x
Out[46]:
In [48]:
from sklearn.preprocessing import StandardScaler
In [49]:
x = StandardScaler().fit_transform(x)
In [50]:
x
Out[50]:
In [51]:
from sklearn.decomposition import PCA
In [52]:
pca = PCA(n_components=2)
In [53]:
principalComponents = pca.fit_transform(x)
In [54]:
pcaDF = pd.DataFrame(data = principalComponents, columns = ['pca_1', 'pca_2'])
In [55]:
pcaDF
Out[55]:
In [57]:
finalDF = pd.concat([df[['Nombre']], pcaDF], axis = 1)
In [58]:
finalDF.head()
Out[58]:
In [60]:
cols = finalDF.columns.tolist()
In [62]:
cols = [cols[-1]] + cols[0:-1]
In [63]:
cols
Out[63]:
In [65]:
finalDF = finalDF.reindex(cols, axis=1)
In [67]:
finalDF.to_csv("QuercusPCA.txt")
In [ ]:
impo