In [46]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
In [6]:
from faker import Factory,Faker
In [12]:
fake = Faker()
fake.seed(101)
In [13]:
fake.company()
Out[13]:
In [15]:
fake.address()
Out[15]:
In [41]:
print(fake.credit_card_full().split('\n'))
In [155]:
X,y = datasets.make_regression( n_samples=500,n_features=4,n_informative=3,bias=1,noise=10)
X = pd.DataFrame(X)
y = pd.DataFrame(y)
df = pd.concat([X,y],axis=1)
df.columns = ['Avg. Session Length','Time on App','Time on Website','Length of Membership','Yearly Amount Spent']
df.head()
Out[155]:
In [156]:
df.describe()
Out[156]:
In [157]:
plus = [33,12,37,3.6,500]
for i,col in enumerate(df.columns):
df[col] = df[col].apply(lambda x: x+plus[i])
In [158]:
df.describe()
Out[158]:
In [ ]:
In [ ]:
In [ ]:
In [159]:
df.describe()
Out[159]:
In [160]:
df['Email'] = df['Time on App'].apply(lambda x : fake.email())
df['Address'] = df['Email'].apply(lambda x : fake.address())
df['Avatar'] = df['Email'].apply(lambda x : fake.color_name())
In [161]:
customers = df[['Email', 'Address','Avatar','Avg. Session Length', 'Time on App', 'Time on Website',
'Length of Membership', 'Yearly Amount Spent']]
In [162]:
customers.to_csv('Ecommerce Customers',index=False)
In [114]:
In [133]:
In [132]:
df['Avg. Session Length'] = df['Avg. Session Length'].apply(lambda x : x+randint(0,100)*0.01)
In [121]:
from random import randint
In [131]:
randint(0,100)*0.1
Out[131]:
In [ ]:
In [ ]:
In [ ]:
In [163]:
# FAKE HOUSING DATA
X,y = datasets.make_regression( n_samples=5000,n_features=5,n_informative=5,bias=3,noise=15)
X = pd.DataFrame(X)
y = pd.DataFrame(y)
df = pd.concat([X,y],axis=1)
In [169]:
df.columns = ['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
'Avg. Area Number of Bedrooms', 'Area Population', 'Price']
In [254]:
df['Address'] = df['Area Population'].apply(lambda x: f.address())
In [255]:
df.head()
Out[255]:
In [257]:
df.to_csv('CA_Housing.csv',index=False)
In [244]:
df['Price'] = df['Price'].apply(lambda x: x*2500 )
In [233]:
def fix(x):
if x<=3:
return 1
elif x>3 and x<7:
return randint(2,4)
else:
return randint(3,6)
In [ ]:
plus = [5,,37,3.6,500]
for i,col in enumerate(df.columns):
df[col] = df[col].apply(lambda x: x+plus[i])
In [231]:
randint(2,4)
Out[231]:
In [249]:
50*.01
Out[249]:
In [253]:
from faker import Faker
f = Faker()
In [ ]:
f.address