In [1]:
print('Auch beim Maschinellen Lernen immer wichtig:' + '\n' +'Aufgabe und Daten umfassend kennenlernen')
In [3]:
%matplotlib inline
In [4]:
import numpy as np
x = np.array([[1, 2, 3], [4, 5, 6]])
print("x:\n{}".format(x))
In [5]:
from scipy import sparse
# create a 2d NumPy array with a diagonal of ones, and zeros everywhere else
eye = np.eye(4)
print("NumPy array:\n{}".format(eye))
In [6]:
# convert the NumPy array to a SciPy sparse matrix in CSR format
# only the non-zero entries are stored
sparse_matrix = sparse.csr_matrix(eye)
print("\nSciPy sparse CSR matrix:\n{}".format(sparse_matrix))
In [7]:
data = np.ones(4)
row_indices = np.arange(4)
col_indices = np.arange(4)
eye_coo = sparse.coo_matrix((data, (row_indices, col_indices)))
print("COO representation:\n{}".format(eye_coo))
In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
# Generierung einer Zahlenreihe von -10 bis 10 in 100 Schritten
x = np.linspace(-10, 10, 100)
# Erzeugen eines Zweiten numpy arrays mit der Funktion sin()
y = np.sin(x)
# The plot function makes a line chart of one array against another
plt.plot(x, y, marker="o", color='brown')
plt.title('Sinus Kurve')
plt.xlabel('x')
plt.ylabel('y')
#plt.legend('sin x','upper left')
Out[8]:
In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
# Generierung einer Zahlenreihe von -10 bis 10 in 100 Schritten
x = np.linspace(-20, 20, 100)
# Erzeugen eines Zweiten numpy arrays mit der Funktion sin()
y = np.exp(x)
# The plot function makes a line chart of one array against another
plt.plot(x, y, marker="o", color='green')
plt.title('Exponential Kurve')
plt.xlabel('x')
plt.ylabel('y')
Out[9]:
In [10]:
%matplotlib inline
import matplotlib.pyplot as plt
from numpy.random import randn
z = randn(100)
red_dot, = plt.plot(z, "ro", markersize=15)
# Schreibe ein weisses Kruez über einige der Daten.
white_cross, = plt.plot(z[:50], "w+", markeredgewidth=3, markersize=15)
plt.legend([red_dot, (red_dot, white_cross)], ["Attr A", "Attr A+B"])
Out[10]:
In [11]:
import pandas as pd
from IPython.display import display
# create a simple dataset of people
data = {'Name': ["John", "Anna", "Peter", "Linda"],
'Location' : ["New York", "Paris", "Berlin", "London"],
'Age' : [24, 13, 53, 33]
}
data_pandas = pd.DataFrame(data)
# IPython.display allows "pretty printing" of dataframes
# in the Jupyter notebook
display(data_pandas)
In [14]:
# One of many possible ways to query the table:
# selecting all rows that have an age column greate than 30
display(data_pandas[data_pandas.Age > 30])
In [12]:
import sys
print("Python version: {}".format(sys.version))
import pandas as pd
print("pandas version: {}".format(pd.__version__))
import matplotlib
print("matplotlib version: {}".format(matplotlib.__version__))
import numpy as np
print("NumPy version: {}".format(np.__version__))
import scipy as sp
print("SciPy version: {}".format(sp.__version__))
import IPython
print("IPython version: {}".format(IPython.__version__))
import sklearn
print("scikit-learn version: {}".format(sklearn.__version__))
In [13]:
from sklearn.datasets import load_iris
iris_dataset = load_iris()
In [14]:
print("Keys of iris_dataset: {}".format(iris_dataset.keys()))
In [15]:
print(iris_dataset['DESCR'][:193] + "\n...")
In [16]:
print("Target names: {}".format(iris_dataset['target_names']))
In [17]:
print("Feature names: {}".format(iris_dataset['feature_names']))
In [18]:
print("Type of data: {}".format(type(iris_dataset['data'])))
In [19]:
print("Shape of data: {}".format(iris_dataset['data'].shape))
In [20]:
print("First five rows of data:\n{}".format(iris_dataset['data'][:5]))
In [21]:
print("Type of target: {}".format(type(iris_dataset['target'])))
In [22]:
print("Shape of target: {}".format(iris_dataset['target'].shape))
In [23]:
print("Shape of target: ")
print(iris_dataset['target'].shape)
In [24]:
print("Target:\n{}".format(iris_dataset['target']))
In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
iris_dataset['data'], iris_dataset['target'], random_state=1)
In [26]:
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))
In [27]:
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))
In [29]:
import mglearn
# create dataframe from data in X_train
# label the columns using the strings in iris_dataset.feature_names
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
# create a scatter matrix from the dataframe, color by y_train
pd.scatter_matrix(iris_dataframe, c=y_train, figsize=(15, 15), marker='o',hist_kwds={'bins': 20}, s=60, alpha=.8, cmap=mglearn.cm3)
Out[29]:
In [30]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
In [31]:
knn.fit(X_train, y_train)
Out[31]:
In [32]:
X_new = np.array([[5, 2.9, 1, 0.2]])
print("X_new.shape: {}".format(X_new.shape))
In [33]:
prediction = knn.predict(X_new)
print("Prediction: {}".format(prediction))
print("Predicted target name: {}".format(
iris_dataset['target_names'][prediction]))
In [34]:
y_pred = knn.predict(X_test)
print("Test set predictions:\n {}".format(y_pred))
In [35]:
print("Test set score: {:.2f}".format(np.mean(y_pred == y_test)))
In [36]:
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))
In [37]:
X_train, X_test, y_train, y_test = train_test_split(
iris_dataset['data'], iris_dataset['target'], random_state=0)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))
In [ ]: