Hierzu verwendet man:
sklearn.datasets.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
In [ ]:
sample_size = 100
feature_size = 20
In [ ]:
from sklearn.datasets import make_classification
X1,y1=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=1)
In [ ]:
type(X1)
In [ ]:
import pandas as pd
X2 = pd.DataFrame(X1)
type(X2)
In [ ]:
X2.head()
In [ ]:
y2=pd.DataFrame(y1)
type(y2)
In [ ]:
y2.info()
In [ ]:
y2.tail()
In [ ]:
from sklearn.datasets import make_classification
X3,y3=make_classification(n_samples=100000, n_features=20, n_informative=6, n_redundant=2, n_repeated=0, n_classes=4, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=10.0, shuffle=True, random_state=1)
In [ ]:
X4=pd.DataFrame(X3)
y4=pd.DataFrame(y3)
In [ ]:
y4.info()
In [ ]:
y4.head(20)
In [ ]:
X4.head(10)
In [ ]:
X4.describe()
In [ ]:
X4.info()
In [ ]:
X4.to_csv('gesamtdaten.csv', encoding='utf-8',index=False)
y4.to_csv('ergebnisdaten.csv', encoding='utf-8',index=False)
In [ ]:
X5=pd.read_csv('gesamtdaten.csv', sep=',')
In [ ]:
X5.head()
In [ ]:
X5['4'].plot()
plt.show()
In [ ]:
X5['4'].describe()
In [ ]:
y5=pd.read_csv('ergebnisdaten.csv', sep=',')
In [ ]:
y5.head()
In [ ]:
y5['0'].plot(kind='hist')
plt.show()
In [ ]:
y5.info()
In [ ]:
sample_size = 300000
feature_size = 10
class_size = 5
In [ ]:
#Erzeugen von 300.000 Sensordaten mit 10 features und 5 KLassen
#class_sep=2.0
#scale=20
X6,y6=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=6, n_redundant=2, n_repeated=0, n_classes=class_size, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=2.0, hypercube=True, shift=0.0, scale=20.0, shuffle=True, random_state=1)
In [ ]:
X7=pd.DataFrame(X6)
In [ ]:
X7.info()
In [ ]:
X7.describe()
In [ ]:
y7=pd.DataFrame(y6)
In [ ]:
y7.plot(kind='hist')
plt.show
In [ ]:
print(X7)
In [ ]:
X7.columns
In [ ]:
X7[1].describe()
In [ ]:
X7[1].plot(kind='box')
plt.show()
In [ ]:
X7.to_csv('sensor-daten.csv', encoding='utf-8',index=False)
y7.to_csv('sensor-ergebnis-klassen.csv', encoding='utf-8',index=False)
In [ ]:
X8=pd.read_csv('sensor-daten.csv', sep=',')
y8=pd.read_csv('sensor-ergebnis-klassen.csv', sep=',')
In [ ]:
X8.columns
In [ ]:
X8.describe()
In [ ]:
X8['1'].mean()
In [ ]:
import numpy as np
In [ ]:
print('klassenbezeichnungen:', np.unique(y8))
In [ ]:
from sklearn.model_selection import train_test_split
In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X8, y8, test_size=0.3, random_state=1)
In [ ]:
X_train.head()
In [ ]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
In [ ]:
sc.fit(X_train)
In [ ]:
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
In [ ]:
print(X_train_std)
In [ ]: