In [1]:
# plot decision tree
from numpy import loadtxt
from xgboost import XGBClassifier
from xgboost import plot_tree
import matplotlib.pyplot as plt
# load data
dataset = loadtxt('./data/pima-indians-diabetes.csv', delimiter=",")
# split data into X and y
X = dataset[:,0:8]
y = dataset[:,8]
# fit model no training data
model = XGBClassifier()
model.fit(X, y)
# plot single tree
plot_tree(model)
plt.show()


<matplotlib.figure.Figure at 0x7f24340b2c18>

In [2]:
import matplotlib
matplotlib.use('nbagg')
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
import pandas as pd


/usr/local/lib/python3.4/dist-packages/matplotlib/__init__.py:1350: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

In [3]:
from matplotlib.pylab import *
(y,x) = mgrid[-2:2.1:0.2, -2:2.1:0.2]
z = x * exp(-x ** 2 - y ** 2)
(dy,dx) = gradient(z)

quiver(x,y,dx,dy,z)
hold(True)
contour(x,y,z,10)
show()



In [4]:
%matplotlib inline
from scipy.stats import norm, rayleigh
import pandas as pd
a = rayleigh.rvs(loc=5,scale=2,size=1000)+1
b = rayleigh.rvs(loc=5,scale=2,size=1000)
c = rayleigh.rvs(loc=5,scale=2,size=1000)-1
data = pd.DataFrame({"a":a,"b":b,"c":c},columns=["a","b","c"])
data.plot(kind="hist",stacked=True,bins=30,figsize=(8,4))


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2402fb39e8>

In [5]:
%matplotlib inline
import matplotlib.pyplot as plt

A = [5,30,45,22]
B = [5,25,50,20]
C = [5,25,50,20]

X = range(4)
plt.bar(X,A,color='b')
plt.bar(X,B,color='r',bottom = A)
#plt.bar(X,C,color='y',bottom = A+B)
plt.show()



In [6]:
import numpy as np
import random as rn
rn.random()


Out[6]:
0.3780767146573495

In [7]:
import re

In [8]:
y = [1,2,3,4,5]
y = np.array(y)
print('mean is',y.mean())
y.dot(y)


mean is 3.0
Out[8]:
55

In [14]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
X,y = make_classification(n_samples=100, n_features=2,n_informative=2,
                          n_redundant=0,n_clusters_per_class=1,class_sep=1.0,
                          random_state=1001,n_classes=3)
plt.scatter(X[:,0],X[:,1],marker='o',c=y,linewidth=0,edgecolor=None)
plt.show()



In [15]:
print(X[:4,:])


[[ 1.33098682  1.07041584]
 [ 0.14916297 -0.47797309]
 [ 1.93996224 -1.88279125]
 [-2.0750232   1.63613895]]

In [16]:
import numpy as np
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X,y)

h=0.02
x_min, x_max = X[:,0].min() -.5, X[:,0].max() + 0.5
y_min, y_max = X[:,1].min() -.5, X[:,1].max() + 0.5

xx,yy = np.meshgrid(np.arange(x_min,x_max,h),np.arange(y_min,y_max,h))
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])

Z = Z.reshape(xx.shape)
print(Z[0,0])
plt.pcolormesh(xx,yy,Z,cmap=plt.cm.autumn)
plt.scatter(X[:,0],X[:,1],marker='o',c=y,linewidth=0,edgecolor=None)
plt.show()


1

In [17]:
%timeit Z = clf.predict_proba(np.c_[xx.ravel(),yy.ravel()])


100 loops, best of 3: 14.1 ms per loop

In [18]:
print(Z)
print(Z.shape)
print(Z[0])


[[1 1 1 ..., 0 0 0]
 [1 1 1 ..., 0 0 0]
 [1 1 1 ..., 0 0 0]
 ..., 
 [1 1 1 ..., 2 2 2]
 [1 1 1 ..., 2 2 2]
 [1 1 1 ..., 2 2 2]]
(325, 320)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

In [ ]:


In [ ]: