In [24]:
import sklearn
import os
import csv
import pandas as pd
In [105]:
os.chdir('/home/mancube/Downloads/data/a10/p1a/')
#os.listdir('/home/mancube/Downloads/data/a10')
In [26]:
header_list='T1_xacc,T2_yacc,T3_zacc,T4_xgyro,T5_ygyro,T6_zgyro,T7_xmag,T8_ymag,T9_zmag,RA_xacc,RA_yacc,RA_zacc,RA_xgyro,RA_ygyro,RA_zgyro,RA_xmag,RA_ymag,RA_zmag,LA_xacc,LA_yacc,LA_zacc,LA_xgyro,LA_ygyro,LA_zgyro,LA_xmag,LA_ymag,LA_zmag,RL_xacc,RL_yacc,RL_zacc,RL_xgyro,RL_ygyro,RL_zgyro,RL_xmag,RL_ymag,RL_zmag,LL_xacc,LL_yacc,LL_zacc,LL_xgyro,LL_ygyro,LL_zgyro,LL_xmag,LL_ymag,LL_zmag'
header_list=header_list.split(',')
columns = header_list
In [145]:
a = pd.DataFrame.from_csv('s01.txt')
#df['0'] = range(125)
#print a[a.columns[0]]
#df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD'))
In [124]:
a['T1_xacc'].describe()
Out[124]:
In [142]:
plt.figure(); a['T3_zacc'].plot();
plt.show()
In [143]:
%pylab inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [ ]:
In [8]:
import sklearn
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
In [9]:
#X, y = make_classification(n_samples=1000, n_features=4)
X,y =
In [11]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
In [12]:
X_train = X[:-200]
X_test = X[-200:]
y_train = y[:-200]
y_test = y[-200:]
In [13]:
lr.fit(X_train, y_train)
y_train_predictions = lr.predict(X_train)
y_test_predictions = lr.predict(X_test)
In [19]:
(y_train_predictions == y_train).sum().astype(float) / y_train.shape[0]
Out[19]:
In [20]:
(y_test_predictions == y_test).sum().astype(float) / y_test.shape[0]
Out[20]:
In [21]:
X, y = make_classification(n_samples=5000, n_features=4, weights=[.95])
In [22]:
sum(y) / (len(y)*1.) #to confirm the class imbalance
Out[22]:
In [79]:
from sklearn.datasets import make_blobs
blobs, classes = make_blobs(500, centers=3)
import matplotlib.pyplot as plt
In [85]:
f, ax = plt.subplots(figsize=(7.5, 7.5))
ax.scatter(blobs[:, 0], blobs[:, 1], color=rgb[classes])
rgb = np.array(['r', 'g', 'b'])
ax.set_title("Blobs")
plt.show()
In [93]:
from sklearn.cluster import KMeans
kmean = KMeans(n_clusters=3)
kmean.fit(blobs)
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10, n_jobs=1, precompute_distances=True,random_state=None, tol=0.0001, verbose=0)
KMeans([[ 0.47819567, 1.80819197],[ 0.08627847, 8.24102715],[ 5.2026125 , 7.86881767]])
f, ax = plt.subplots(figsize=(7.5, 7.5))
ax.scatter(blobs[:, 0], blobs[:, 1], color=rgb[classes])
ax.scatter(KMeans[:, 0], KMeans[:, 1], marker='*', s=250, color='black', label='Centers')
ax.set_title("Blobs")
ax.legend(loc='best')
In [104]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
X_train = [[6], [8], [10], [14], [18]]
y_train = [[7], [9], [13], [17.5], [18]]
X_test = [[6], [8], [11], [16]]
y_test = [[8], [12], [15], [18]]
regressor = LinearRegression()
regressor.fit(X_train, y_train)
xx = np.linspace(0, 26, 100)
yy = regressor.predict(xx.reshape(xx.shape[0], 1))
plt.plot(xx, yy)
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test)
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), c='r', linestyle='--')
plt.title('Pizza price regressed on diameter')
plt.xlabel('Diameter in inches')
plt.ylabel('Price in dollars')
plt.axis([0, 25, 0, 25])
plt.grid(True)
plt.scatter(X_train, y_train)
plt.show()
print X_train
print X_train_quadratic
print X_test
print X_test_quadratic
print 'Simple linear regression r-squared', regressor.score(X_test, y_test)
print 'Quadratic regression r-squared', regressor_quadratic.score(X_test_quadratic, y_test)
In [98]:
len([[6], [8], [10], [14], [18]])
Out[98]:
In [100]:
range(5)
Out[100]:
In [148]:
from sklearn.cross_validation import train_test_split
aT1_xacc= a['T1_xacc']
aT1_zacc= a['T1_zacc']
X_train, X_test, y_train, y_test = train_test_split(aT1_xacc, aT1_yacc, test_size=0.25, \
random_state=33)
In [149]:
print(a.shape)
In [165]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
array = a.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X, Y)
# summarize scores
numpy.set_printoptions(precision=3)
print(fit.scores_)
features = fit.transform(X)
# summarize selected features
print(features[0:5,:])
In [171]:
numpyMatrix = a['T1_zacc'].as_matrix()
In [184]:
print a.iloc[0:1]
columns=['T1_xacc']#, 'T2_yacc', 'T3_zacc']
columns1=['T4_xgyro']#, 'T5_ygyro', 'T6_zgyro']
In [191]:
import numpy as np
from numpy import convolve
import matplotlib.pyplot as plt
def movingaverage (values, window):
weights = np.repeat(1.0, window)/window
sma = np.convolve(values, weights, 'valid')
return sma
#s = columns.Series()
x= columns.values.T.tolist()
y = columns1.values.T.tolist()
yMA = movingaverage(y,3)
#print yMA
plt.plot(x[len(x)-len(yMA):],yMA)
plt.show()
In [192]:
import numpy
from pandas import read_csv
from sklearn.decomposition import PCA
# load data
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
pca = PCA(n_components=3)
fit = pca.fit(X)
# summarize components
print("Explained Variance: %s") % fit.explained_variance_ratio_
print(fit.components_)
In [201]:
import numpy
from pandas import read_csv
from sklearn.decomposition import PCA
# load data
array = a.values
X = abs(array[:,])
Y = array[:,8]
# feature extraction
pca = PCA(n_components=3)
fit = pca.fit(X)
# summarize components
print("Explained Variance: %s") % fit.explained_variance_ratio_
print(fit.components_)
In [202]:
x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0])
z = np.polyfit(x, y, 3)
z
In [ ]: