In [14]:
'''generating a fake meaningless dataset to test the function while the real data is pulled'''
import numpy as np
import pandas as pd
In [15]:
#sample paarmeters
n_inliers = 100
n_outliers = 20
outliers_fraction=0.2
# Data generation
X1 = 2 * np.random.randn( n_inliers, 3)-6
X2 = 3 * np.random.randn(n_inliers, 3) +6
X = np.r_[X1, X2]
# print(X.shape)
# Add outliers
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 3))]
# Add longitude and latitude and time
longitude=2 * np.random.randn( 2*n_inliers+n_outliers, 1)+20
latitude = 2 * np.random.randn( 2*n_inliers+n_outliers, 1)+50
time= np.random.randn(2*n_inliers+n_outliers, 1) + 1000
X_all =np.c_[longitude,latitude,time,X]
# print(X_all.shape)
In [24]:
#save fak data to the scv file
fnamesave = 'fakedata.csv'
df=pd.DataFrame(X_all)
df.columns=['latitude', 'longitude', 'datetime', 'X1', 'X2', 'X3']
df.to_csv(fnamesave,index=False)