Arbeiten mit Pandas

Hierzu verwendet man:
sklearn.datasets.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html


In [ ]:
sample_size = 100
feature_size = 20

In [ ]:
from sklearn.datasets import make_classification
X1,y1=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=1)

In [ ]:
type(X1)

In [ ]:
import pandas as pd
X2 = pd.DataFrame(X1)
type(X2)

In [ ]:
X2.head()

In [ ]:
y2=pd.DataFrame(y1)
type(y2)

In [ ]:
y2.info()

In [ ]:
y2.tail()

In [ ]:
from sklearn.datasets import make_classification
X3,y3=make_classification(n_samples=100000, n_features=20, n_informative=6, n_redundant=2, n_repeated=0, n_classes=4, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=10.0, shuffle=True, random_state=1)

In [ ]:
X4=pd.DataFrame(X3)
y4=pd.DataFrame(y3)

In [ ]:
y4.info()

In [ ]:
y4.head(20)

In [ ]:
X4.head(10)

In [ ]:
X4.describe()

In [ ]:
X4.info()

In [ ]:
X4.to_csv('gesamtdaten.csv', encoding='utf-8',index=False)
y4.to_csv('ergebnisdaten.csv', encoding='utf-8',index=False)

In [ ]:
X5=pd.read_csv('gesamtdaten.csv', sep=',')

In [ ]:
X5.head()

In [ ]:
X5['4'].plot()
plt.show()

In [ ]:
X5['4'].describe()

In [ ]:
y5=pd.read_csv('ergebnisdaten.csv', sep=',')

In [ ]:
y5.head()

In [ ]:
y5['0'].plot(kind='hist')
plt.show()

In [ ]:
y5.info()

Erzeugen einer Klassifikationsaufgabe mit 300000 Datensätzen und 10 features aus Sensordaten


In [ ]:
sample_size = 300000
feature_size = 10
class_size = 5

In [ ]:
#Erzeugen von 300.000 Sensordaten mit 10 features und 5 KLassen
#class_sep=2.0
#scale=20
X6,y6=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=6, n_redundant=2, n_repeated=0, n_classes=class_size, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=2.0, hypercube=True, shift=0.0, scale=20.0, shuffle=True, random_state=1)

In [ ]:
X7=pd.DataFrame(X6)

In [ ]:
X7.info()

In [ ]:
X7.describe()

In [ ]:
y7=pd.DataFrame(y6)

In [ ]:
y7.plot(kind='hist')
plt.show

In [ ]:
print(X7)

In [ ]:
X7.columns

In [ ]:
X7[1].describe()

In [ ]:
X7[1].plot(kind='box')
plt.show()

In [ ]:
X7.to_csv('sensor-daten.csv', encoding='utf-8',index=False)
y7.to_csv('sensor-ergebnis-klassen.csv', encoding='utf-8',index=False)

In [ ]:
X8=pd.read_csv('sensor-daten.csv', sep=',')
y8=pd.read_csv('sensor-ergebnis-klassen.csv', sep=',')

In [ ]:
X8.columns

In [ ]:
X8.describe()

In [ ]:
X8['1'].mean()

In [ ]:
import numpy as np

In [ ]:
print('klassenbezeichnungen:', np.unique(y8))

In [ ]:
from sklearn.model_selection import train_test_split

In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X8, y8, test_size=0.3, random_state=1)

In [ ]:
X_train.head()

In [ ]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [ ]:
sc.fit(X_train)

In [ ]:
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [ ]:
print(X_train_std)

In [ ]: