In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes=True)
%matplotlib inline
In [2]:
df = pd.read_csv('iris.data')
In [3]:
df.head()
Out[3]:
In [4]:
df = pd.read_csv('iris.data', header=-1)
df.head()
Out[4]:
In [5]:
col_name = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class']
In [6]:
df.columns = col_name
In [7]:
df.head()
Out[7]:
In [8]:
iris = sns.load_dataset('iris')
iris.head()
Out[8]:
In [9]:
df.describe()
Out[9]:
In [10]:
iris.describe()
Out[10]:
In [11]:
print(iris.info())
In [12]:
print(iris.groupby('species').size())
In [13]:
sns.pairplot(iris, hue='species', size=3, aspect=1);
In [14]:
iris.hist(edgecolor='black', linewidth=1.2, figsize=(12,8));
plt.show();
In [15]:
plt.figure(figsize=(12,8));
plt.subplot(2,2,1)
sns.violinplot(x='species', y='sepal_length', data=iris)
plt.subplot(2,2,2)
sns.violinplot(x='species', y='sepal_width', data=iris)
plt.subplot(2,2,3)
sns.violinplot(x='species', y='petal_length', data=iris)
plt.subplot(2,2,4)
sns.violinplot(x='species', y='petal_width', data=iris);
In [16]:
iris.boxplot(by='species', figsize=(12,8));
In [17]:
pd.plotting.scatter_matrix(iris, figsize=(12,10))
plt.show()
In [18]:
sns.pairplot(iris, hue="species",diag_kind="kde");
In [19]:
%%HTML
<iframe width=100% height=500 src='http://scikit-learn.org/stable/'></iframe>
In [20]:
iris.head()
Out[20]:
Consistency. All objects (basic or composite) share a consistent interface composed of a limited set of methods. This interface is documented in a consistent manner for all objects.
Inspection. Constructor parameters and parameter values determined by learning algorithms are stored and exposed as public attributes.
Non-proliferation of classes. Learning algorithms are the only objects to be represented using custom classes. Datasets are represented as NumPy arrays or SciPy sparse matrices. Hyper-parameter names and values are represented as standard Python strings or numbers whenever possible. This keeps scikitlearn easy to use and easy to combine with other libraries.
Composition. Many machine learning tasks are expressible as sequences or combinations of transformations to data. Some learning algorithms are also naturally viewed as meta-algorithms parametrized on other algorithms. Whenever feasible, such algorithms are implemented and composed from existing building blocks.
Sensible defaults. Whenever an operation requires a user-defined parameter, an appropriate default value is defined by the library. The default value should cause the operation to be performed in a sensible way (giving a baseline solution for the task at hand).
In [21]:
generate_random = np.random.RandomState(0)
x = 10 * generate_random.rand(100)
In [22]:
y = 3 * x + np.random.randn(100)
In [23]:
plt.figure(figsize = (10, 8))
plt.scatter(x, y);
In [24]:
from sklearn.linear_model import LinearRegression
In [25]:
model = LinearRegression(fit_intercept=True)
In [26]:
model
Out[26]:
In [27]:
X = x.reshape(-1, 1)
X.shape
Out[27]:
In [28]:
model.fit(X, y)
Out[28]:
In [29]:
model.coef_
Out[29]:
In [30]:
model.intercept_
Out[30]:
In [31]:
x_fit = np.linspace(-1, 11)
In [32]:
X_fit = x_fit.reshape(-1,1)
In [33]:
y_fit = model.predict(X_fit)
In [34]:
plt.figure(figsize = (10, 8))
plt.scatter(x, y)
plt.plot(x_fit, y_fit);
In [ ]: