Arbeiten mit Pandas

Hierzu verwendet man:
sklearn.datasets.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html



In [ ]:

    
sample_size = 100
feature_size = 20



In [ ]:

    
from sklearn.datasets import make_classification
X1,y1=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=1)



In [ ]:

    
type(X1)



In [ ]:

    
import pandas as pd
X2 = pd.DataFrame(X1)
type(X2)



In [ ]:

    
X2.head()



In [ ]:

    
y2=pd.DataFrame(y1)
type(y2)



In [ ]:

    
y2.info()



In [ ]:

    
y2.tail()



In [ ]:

    
from sklearn.datasets import make_classification
X3,y3=make_classification(n_samples=100000, n_features=20, n_informative=6, n_redundant=2, n_repeated=0, n_classes=4, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=10.0, shuffle=True, random_state=1)



In [ ]:

    
X4=pd.DataFrame(X3)
y4=pd.DataFrame(y3)



In [ ]:

    
y4.info()



In [ ]:

    
y4.head(20)



In [ ]:

    
X4.head(10)



In [ ]:

    
X4.describe()



In [ ]:

    
X4.info()



In [ ]:

    
X4.to_csv('gesamtdaten.csv', encoding='utf-8',index=False)
y4.to_csv('ergebnisdaten.csv', encoding='utf-8',index=False)



In [ ]:

    
X5=pd.read_csv('gesamtdaten.csv', sep=',')



In [ ]:

    
X5.head()



In [ ]:

    
X5['4'].plot()
plt.show()



In [ ]:

    
X5['4'].describe()



In [ ]:

    
y5=pd.read_csv('ergebnisdaten.csv', sep=',')



In [ ]:

    
y5.head()



In [ ]:

    
y5['0'].plot(kind='hist')
plt.show()



In [ ]:

    
y5.info()

Erzeugen einer Klassifikationsaufgabe mit 300000 Datensätzen und 10 features aus Sensordaten



In [ ]:

    
sample_size = 300000
feature_size = 10
class_size = 5



In [ ]:

    
#Erzeugen von 300.000 Sensordaten mit 10 features und 5 KLassen
#class_sep=2.0
#scale=20
X6,y6=make_classification(n_samples=sample_size, n_features=feature_size, n_informative=6, n_redundant=2, n_repeated=0, n_classes=class_size, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=2.0, hypercube=True, shift=0.0, scale=20.0, shuffle=True, random_state=1)



In [ ]:

    
X7=pd.DataFrame(X6)



In [ ]:

    
X7.info()



In [ ]:

    
X7.describe()



In [ ]:

    
y7=pd.DataFrame(y6)



In [ ]:

    
y7.plot(kind='hist')
plt.show



In [ ]:

    
print(X7)



In [ ]:

    
X7.columns



In [ ]:

    
X7[1].describe()



In [ ]:

    
X7[1].plot(kind='box')
plt.show()



In [ ]:

    
X7.to_csv('sensor-daten.csv', encoding='utf-8',index=False)
y7.to_csv('sensor-ergebnis-klassen.csv', encoding='utf-8',index=False)



In [ ]:

    
X8=pd.read_csv('sensor-daten.csv', sep=',')
y8=pd.read_csv('sensor-ergebnis-klassen.csv', sep=',')



In [ ]:

    
X8.columns



In [ ]:

    
X8.describe()



In [ ]:

    
X8['1'].mean()



In [ ]:

    
import numpy as np



In [ ]:

    
print('klassenbezeichnungen:', np.unique(y8))



In [ ]:

    
from sklearn.model_selection import train_test_split



In [ ]:

    
X_train, X_test, y_train, y_test = train_test_split(X8, y8, test_size=0.3, random_state=1)



In [ ]:

    
X_train.head()



In [ ]:

    
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()



In [ ]:

    
sc.fit(X_train)



In [ ]:

    
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)



In [ ]:

    
print(X_train_std)



In [ ]: