In [1]:
import time
import random

import pandas as pd
import numpy as np


from faker import Factory

In [2]:
fake = Factory.create()

In [3]:
companies = pd.read_csv('label.csv')
companies


Out[3]:
label name
0 Beauty Bobbi Brown Inc.
1 Beauty Dior Beauty
2 Beauty Derma Skincare Centre
3 Beauty KIKO Asia Ltd.
4 Beauty Vichy Laboratories
5 Beauty Colourmix Cosmetics Company Limited
6 Beauty Sa Sa Cosmetic Co. Ltd.
7 Beauty Clinique Laboratories, llc.
8 Beauty Shiseido Company, Limited
9 Beauty Sephora USA, Inc.
10 Beauty MAKE-UP ART COSMETICS.
11 Beauty Clarins.
12 Beauty L'Oreal SA
13 Beauty Laura Mercier Cosmetics.
14 Beauty FANCL International, Inc.
15 Education Memrise Inc.
16 Education Coursera Inc.
17 Education Udacity, Inc.
18 Education Code School LLC.
19 Education edX Inc.
20 Education Open Learning Global Pty Ltd.
21 Education New Oriental Education & Tech Grp
22 Education Chinese TAL Education Group
23 Education Tarena International, Inc.
24 Education Chegg Inc
25 Education 2U, Inc.
26 Education McGraw-Hill Education
27 Education Pearson plc
28 Education China Distance Education Hldgs Ltd
29 Education Udemy, Inc.
... ... ...
120 Entertainment Broadway Cinematheque
121 Entertainment UA Cinema Circuit Limited
122 Entertainment Neway Karaoke Box Limited
123 Entertainment Red MR Limited
124 Entertainment Walt Disney Company
125 Entertainment AMC Entertainment Inc
126 Entertainment Nintendo
127 Entertainment Xbox
128 Entertainment LEGO
129 Entertainment Ocean Park Hong Kong
130 Entertainment Ryze Ultimate Trampoline Park Hong Kong
131 Entertainment Ball Room
132 Entertainment Ikari Area
133 Entertainment VR Arena
134 Entertainment HA cube
135 Clothing American Eagle Outfitters Inc
136 Clothing Holister Co.
137 Clothing Zara
138 Clothing H&M Hennes & Mauritz AB
139 Clothing GAP
140 Clothing Levi Strauss & Co.
141 Clothing G2000
142 Clothing Louis Vuitton
143 Clothing Longchamp
144 Clothing J. Crew
145 Clothing Nike
146 Clothing Adidas
147 Clothing Converse
148 Clothing Superdry
149 Clothing Balenciaga

150 rows × 2 columns


In [9]:
#companies = pd.Series(companies.name.values)

In [8]:
# random.shuffle(companies)

In [8]:
def generate_people(n):
    """ Generates n-people, and returns a data frame.
        Each person has a:
            - id
            - age
            - name
            - address
            
        Two of them may be unexpected:
            - compromised: 0 or 1
            - compromised: if compromised, when
            
        These values get set by other functions."""
#     namePool = np.load('names.npy')
#     ids = [i for i in range(n)]
#     names = [fake.name() for n in range(n)]
#     uuids = [fake.uuid4() for n in range(n)]
#     ages = [random.randint(18, 80) for n in range(n)]
#     addresses = [fake.address().replace('\n', ', ') for n in range(n)]
#     compromised = list(np.zeros(n))
#     compromised_time = list(np.zeros(n))
    #generate names
    names = set()
    while len(names) < n:
        nextName = fake.name()
        if nextName not in names:
            names.add(nextName)
    names = list(names)
    names = np.array(names)
    genders = np.random.choice(['male', 'female'], n)
    ids = np.arange(n)
    ages = [random.randint(18, 50) for _ in range(n)]
    addresses = [fake.address().replace('\n', ', ') for _ in range(n)]
    compromised = list(np.zeros(n))
    compromised_time = list(np.zeros(n))
    df = pd.DataFrame([ids, names, genders, ages, addresses, compromised, compromised_time]).T
    df.columns = ['id', 'name', 'gender', 'age', 'address', 'compromised', 'compromised_time']
    return df

In [12]:
def generate_business(n, fraudster_likelihood=0.01):
    """ Generates n-businesses, and returns a data frame.
        Each person has a:
            - id
            - name
            - address
            
        The one weird one is "frauders_present" which is controlled by the fraudster_likelihood option.
            
        These values get set by other functions."""
    
    ids = [i for i in range(n)]
#     names = list(companies[:n])
    coms = companies.sample(n, replace=True)
    addresses = [fake.address().replace('\n', ', ') for n in range(n)]
    fraudsters = [1 if random.random() < fraudster_likelihood else 0 for n in range(n) ]
    
    df = pd.DataFrame([ids, list(coms['name']), list(coms['label']), addresses]).T
    df.columns = ['id', 'name', 'label', 'address']
    return df

In [24]:
def generate_normal_transactions(people, businesses, max_transactions=40, fraud_likelihood=1, earliest="-30d", latest="now", amin=9, amax=300):
    """ Generates normal transactions. Returns a list."""
    transactions = []
    amounts = []
    companyNameList = []
    companyLabelList = []
    times = []
    users = []
    ageList = []
    genderList = []
    # For each person
    for p in range(len(people)):
        
        person = people.iloc[p]
        
#         number_of_transactions = random.randint(1, max_transactions)
        number_of_transactions= max_transactions
        # Generate soem transactions
        for i in range(number_of_transactions):

            # random time and business
            time = fake.date_time_between(start_date=earliest, end_date=latest, tzinfo=None)
#             time = fake.date_time_this_month()
            business = businesses.sample(1)
            companyName = business['name']
            companyLabel = business['label']
            amount = random.randint(amin, amax) + round(random.random(), 2)

            
            times.append(time)
            companyNameList.append(companyName)
            companyLabelList.append(companyLabel)
            amounts.append(amount)
            users.append(person['name'])
            ageList.append(person.age)
            genderList.append(person.gender)
#             transactions.append(transaction)
    companyList= companies.sample(len(times), replace=True)
    companyNameList = list(companyList['name'])
    companyLabelList = list(companyList['label'])
    
    df = pd.DataFrame([times, users, genderList, ageList, companyNameList, companyLabelList, amounts]).T
    df.columns = ['time', 'name', 'gender' ,'age', 'company name', 'company label', 'amount']
    return df
# generate_normal_transactions(p,b)

In [9]:
# def generate_fradulent_transactions(people, businesses, max_transactions=40, user_fraud_detection_likelihood=0.10, latest="+10d", amin=1, amax=3000):
#     """ Genereates fradulent transactions. Returns a list."""
#     transactions = []
#     compromised = people[people.compromised == 1]
    
#     for i in range(len(compromised)):
        
#         person = compromised.iloc[i]
#         earliest = person.compromised_time
#         time = fake.date_time_between(start_date=earliest, end_date=latest, tzinfo=None)
# #         number_of_transactions = random.randint(5, max_transactions)
#         number_of_transactions = max_transactions
#         for j in range(number_of_transactions):
    
#             def fraud():        

#                 business = businesses.sample(1)
#                 amount = random.randint(amin, amax) + round(random.random(), 2)

#                 transaction = '{time} uuid={uuid} user="{user}" business="{business}" amount={amount} disputed=true'.format(
#                     time = time,
#                     uuid = person['uuid'],
#                     user = person['name'],
#                     business = business.name.values[0],
#                     amount = amount)

#                 transactions.append(transaction)

#             if j == 1:
#                 fraud()

#             # Each fradulent transaction has +1% chance of being cause from the user
#             # Imaginging they are maybe monitoring their transaction history
#             # Or happen to be checking one thing or another
#             if random.random() < j/100:
#                 fraud()
            
#     return transactions

In [10]:
generate_people(3)


Out[10]:
id name age address compromised compromised_time
0 0 Erin Burton 51 00076 Warren Inlet, North Justinchester, IN 97410 0 0
1 1 Robert Bush 56 528 Duran Port, Ericland, TX 14494-3816 0 0
2 2 Scott Hansen 63 49906 Rivera Rest Suite 699, East Daniel, AS 5... 0 0

In [12]:
generate_business(3)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-12-44df4dda289a> in <module>()
----> 1 generate_business(3)

<ipython-input-11-1768d55b910a> in generate_business(n, fraudster_likelihood)
     16     fraudsters = [1 if random.random() < fraudster_likelihood else 0 for n in range(n) ]
     17 
---> 18     df = pd.DataFrame([ids, names.name, addresses]).T
     19     df.columns = ['id', 'name', 'address']
     20     return df

/usr/local/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
   3079             if name in self._info_axis:
   3080                 return self[name]
-> 3081             return object.__getattribute__(self, name)
   3082 
   3083     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'name'

In [17]:
p = generate_people(10000)
b = generate_business(3000)
b


Out[17]:
id name label address
0 0 New Oriental Education & Tech Grp Education 1075 Santiago Gateway Apt. 840, South John, NM...
1 1 Din Tai Fung Dining 6004 Rachel Points, West Nicolemouth, OK 36734...
2 2 Nike Clothing 25100 Martinez Forks, Port Christine, IN 13287...
3 3 The 'Star' Ferry Company, Limited Transport 545 Daniels Parkway Suite 779, Lake Tanyaberg,...
4 4 2U, Inc. Education PSC 1593, Box 8631, APO AA 40984-1758
5 5 The 'Star' Ferry Company, Limited Transport 3224 Anderson Turnpike, Lake Luis, WV 45334-6452
6 6 Vudu, Inc. Media 814 Diana Springs Apt. 930, North Jacobhaven, ...
7 7 China Distance Education Hldgs Ltd Education 7983 Aaron Cliff Suite 831, Nelsonport, OK 32924
8 8 AMC Entertainment Inc Entertainment Unit 9786 Box 6665, DPO AA 70194
9 9 City Super Limited Grocery 81893 Cheryl Circle, Kathrynport, NY 54107
10 10 Faber-Castell Stationery 153 Palmer Extensions, Robertville, ND 12250-8124
11 11 MAKE-UP ART COSMETICS. Beauty 6262 Brown Green, South Ronnie, FL 11742-5478
12 12 KKBOX Media 808 Contreras Fork Suite 191, Timothymouth, MA...
13 13 iQIYI.COM Media 049 Arnold Fords Suite 086, Armstrongville, ND...
14 14 H&M Hennes & Mauritz AB Clothing 9600 Pacheco Courts, Karenborough, AL 03728
15 15 Dior Beauty Beauty 41625 Martin Oval, Thomasview, NJ 23305
16 16 Shiseido Company, Limited Beauty 316 Dunn Loop, East Nathanielside, WV 85195
17 17 Air China Airlines Transport 6686 Acosta Shore Suite 664, Port Amber, KY 28026
18 18 Marks and Spencer Group Plc Grocery 495 Williams Causeway, West Jesusbury, TN 89427
19 19 Pearson plc Education 2128 Angela Mountains Apt. 133, South Gloriast...
20 20 Hong Kong Dragon Airlines Ltd Transport 8830 Thompson Ferry Apt. 300, Brettchester, KS...
21 21 New Oriental Education & Tech Grp Education 307 White Plaza Suite 145, New Gina, IL 80672
22 22 Hong Kong Airlines Transport 56482 Lindsay Ports Suite 659, East Robert, SD...
23 23 PARKnSHOP Supermarket HK. Grocery 9860 Waters Ridge, West Richard, AK 21321-0509
24 24 AEON CO. LTD Grocery Unit 2820 Box 0889, DPO AP 29718
25 25 Pearson plc Education 21007 Romero View Suite 763, Collinsport, MP 6...
26 26 Faber-Castell Stationery 61407 Ramsey Loop, New Stephanie, CO 25761-2533
27 27 L'Oreal SA Beauty 4938 James Landing, Hugheschester, CT 61216
28 28 Sephora USA, Inc. Beauty 149 Scott Court Apt. 135, Margaretshire, AR 66230
29 29 Coursera Inc. Education Unit 7230 Box 4529, DPO AP 92260-4967
... ... ... ... ...
2970 2970 PILOT CORPORATION Stationery 7671 Christina Gardens Apt. 044, Wilsonton, PR...
2971 2971 ZAIS Financial Corp. Investment 4155 Daniel Track, Amberchester, SD 99809
2972 2972 Hok Yee Stationery Company Stationery 439 Miller Station Apt. 641, Brianton, NY 4025...
2973 2973 Shiseido Company, Limited Beauty 069 Anthony Ridges Apt. 705, Jeannestad, IL 41341
2974 2974 Maxim' s Catering Dining 7403 Kristy Street Suite 821, Amandafort, SD 1...
2975 2975 Balenciaga Clothing 92238 Stephanie Isle Suite 583, South Sarah, W...
2976 2976 Burger King Dining 6607 Kevin View, North Anita, FL 73344-6837
2977 2977 Neway Karaoke Box Limited Entertainment 04761 Chen Hollow, Grahammouth, WA 81364
2978 2978 Home Box Office Inc. Media Unit 2605 Box 7645, DPO AA 07830
2979 2979 TCF Financial Corporation Investment 79029 Steven Fork Apt. 143, Millershire, DC 66335
2980 2980 Maxim' s Catering Dining 71030 Campbell Mall, Brownville, ID 93830
2981 2981 Dior Beauty Beauty 38179 Sawyer Manor, Lake Matthewburgh, AR 60240
2982 2982 Laura Mercier Cosmetics. Beauty USNV Smith, FPO AP 25147
2983 2983 Costco Wholesale Corporation Grocery 80767 Quinn Groves Apt. 955, South Kellyland, ...
2984 2984 Levi Strauss & Co. Clothing 825 Sandra Well, New Kathychester, UT 03334-5040
2985 2985 Best Buy Co Inc Grocery PSC 7604, Box 0140, APO AE 98784
2986 2986 Pacific Coffee Company Dining 4034 Reynolds Mount Apt. 876, Deleonview, WI 1...
2987 2987 Burger King Dining 6332 Frederick Loaf Apt. 805, West Stephanie, ...
2988 2988 Yoshinoya Dining 860 Lopez Shore Apt. 334, South Lindsayport, C...
2989 2989 PARKnSHOP Supermarket HK. Grocery PSC 8555, Box 7142, APO AA 71145-6300
2990 2990 Shenzhen Comix Stationery Co., Ltd. Stationery 57255 Eric Plaza, Thomasview, MN 79199-9539
2991 2991 The Ohio & Michigan Paper Company Stationery 68185 Blankenship Prairie Suite 799, Port Bria...
2992 2992 Home Box Office Inc. Media 167 Ronnie Trafficway, Mistybury, TN 81936-7302
2993 2993 Neway Karaoke Box Limited Entertainment PSC 4189, Box 2267, APO AA 34848
2994 2994 Memrise Inc. Education 536 Haney Plaza Apt. 243, Reedmouth, WA 42881
2995 2995 Dior Beauty Beauty 00329 Madden Mountains Apt. 133, New Brandipor...
2996 2996 Maped SAS Stationery Unit 6614 Box 3561, DPO AE 56521-5739
2997 2997 Udemy, Inc. Education 2556 Wilson Alley, Port Debbiefurt, NC 98967-1377
2998 2998 Redbox Automated Retail, LLC. Media 384 Ramos Stream Suite 417, Lake Judith, GU 56217
2999 2999 Clarins. Beauty 89547 Zachary Trace, East Michelefort, FL 61664

3000 rows × 4 columns


In [25]:
transactionDf = generate_normal_transactions(p,b)
transactionDf


Out[25]:
time name gender age company name company label amount
0 2017-07-02 22:27:48 Rachel Trujillo male 36 Coursera Inc. Education 111.22
1 2017-06-10 12:37:05 Rachel Trujillo male 36 Laura Mercier Cosmetics. Beauty 108.46
2 2017-06-07 14:24:02 Rachel Trujillo male 36 Synovus Financial Corp. Investment 208.21
3 2017-06-25 15:36:22 Rachel Trujillo male 36 Peak Tramways Company Limited Transport 222.79
4 2017-06-30 03:34:48 Rachel Trujillo male 36 McDonald' s Dining 86.28
5 2017-06-29 18:00:00 Rachel Trujillo male 36 Pearson plc Education 47.68
6 2017-06-30 21:34:26 Rachel Trujillo male 36 Wal-Mart Stores Inc Grocery 93.69
7 2017-07-04 11:57:56 Rachel Trujillo male 36 Principal Financial Group Inc Investment 115.46
8 2017-06-13 06:54:58 Rachel Trujillo male 36 Voya Financial, Inc. Investment 142.11
9 2017-07-02 18:23:56 Rachel Trujillo male 36 The Dairy Farm Company, Limited Grocery 61.87
10 2017-06-17 03:32:26 Rachel Trujillo male 36 2U, Inc. Education 280.35
11 2017-07-03 13:44:19 Rachel Trujillo male 36 City Super Limited Grocery 281.82
12 2017-06-16 16:16:17 Rachel Trujillo male 36 GAP Clothing 9.13
13 2017-06-17 01:17:02 Rachel Trujillo male 36 Subway Dining 104.77
14 2017-06-21 11:14:37 Rachel Trujillo male 36 KOKUYO Co.,Ltd. Stationery 300.88
15 2017-06-11 21:15:08 Rachel Trujillo male 36 Raymond James Financial, Inc. Investment 33.22
16 2017-06-14 11:45:03 Rachel Trujillo male 36 KOKUYO Co.,Ltd. Stationery 297.04
17 2017-07-03 21:29:17 Rachel Trujillo male 36 Liberty Stationery Corp. Stationery 133.08
18 2017-06-20 09:04:47 Rachel Trujillo male 36 Ikari Area Entertainment 9.43
19 2017-06-10 18:31:22 Rachel Trujillo male 36 The 'Star' Ferry Company, Limited Transport 201.96
20 2017-07-05 20:58:23 Rachel Trujillo male 36 Tim Ho Wan Dining 292.67
21 2017-06-23 20:15:43 Rachel Trujillo male 36 BB Liquidating Inc Media 163.39
22 2017-06-24 15:32:14 Rachel Trujillo male 36 Parker Pen Company Stationery 260.15
23 2017-06-24 04:00:43 Rachel Trujillo male 36 Le.com Media 149.06
24 2017-07-04 03:13:19 Rachel Trujillo male 36 J. Crew Clothing 273.85
25 2017-06-12 00:30:44 Rachel Trujillo male 36 Voya Financial, Inc. Investment 93.04
26 2017-06-20 09:50:33 Rachel Trujillo male 36 Neway Karaoke Box Limited Entertainment 239.05
27 2017-06-22 22:08:41 Rachel Trujillo male 36 TCF Financial Corporation Investment 244.24
28 2017-06-11 13:26:35 Rachel Trujillo male 36 Principal Financial Group Inc Investment 117.48
29 2017-07-04 09:40:59 Rachel Trujillo male 36 Laura Mercier Cosmetics. Beauty 134.45
... ... ... ... ... ... ... ...
399970 2017-06-12 23:41:20 Kelsey Jones male 50 Sumitomo Mitsui Financial Group Inc Investment 16.89
399971 2017-06-29 22:55:52 Kelsey Jones male 50 Udacity, Inc. Education 197.18
399972 2017-06-08 21:36:12 Kelsey Jones male 50 Staedtler Mars GmbH & Co. KG Stationery 81.63
399973 2017-06-07 07:32:09 Kelsey Jones male 50 Waddell & Reed Financial, Inc. Investment 258.03
399974 2017-06-30 10:49:34 Kelsey Jones male 50 Shenzhen Comix Stationery Co., Ltd. Stationery 200.22
399975 2017-06-29 10:25:22 Kelsey Jones male 50 Hong Kong Dragon Airlines Ltd Transport 16.32
399976 2017-07-02 23:05:33 Kelsey Jones male 50 Maxim' s Catering Dining 265.12
399977 2017-06-14 09:14:25 Kelsey Jones male 50 Shinhan Financial Group Co Ltd Investment 113.96
399978 2017-06-16 23:01:34 Kelsey Jones male 50 The Kowloon Motor Bus Company Limited Transport 51.15
399979 2017-06-30 20:05:53 Kelsey Jones male 50 Xbox Entertainment 233.69
399980 2017-06-14 11:43:47 Kelsey Jones male 50 J. Crew Clothing 141.99
399981 2017-06-07 09:19:46 Kelsey Jones male 50 New World First Bus Services Limited Transport 197.35
399982 2017-06-28 11:42:38 Kelsey Jones male 50 edX Inc. Education 294.94
399983 2017-06-13 14:28:37 Kelsey Jones male 50 Yadkin Financial Corporation Investment 52.28
399984 2017-06-17 13:29:12 Kelsey Jones male 50 Netflix, Inc. Media 31.73
399985 2017-06-15 06:16:08 Kelsey Jones male 50 Ikari Area Entertainment 201.77
399986 2017-06-23 18:05:50 Kelsey Jones male 50 Levi Strauss & Co. Clothing 272.93
399987 2017-06-16 12:47:34 Kelsey Jones male 50 Spotify AB Media 114.97
399988 2017-06-22 20:57:56 Kelsey Jones male 50 Cathay Pacific Airways Transport 71.12
399989 2017-06-26 04:56:07 Kelsey Jones male 50 Coursera Inc. Education 275.3
399990 2017-07-02 17:11:21 Kelsey Jones male 50 Code School LLC. Education 92.36
399991 2017-06-16 02:28:35 Kelsey Jones male 50 Ryze Ultimate Trampoline Park Hong Kong Entertainment 283.62
399992 2017-07-04 20:10:37 Kelsey Jones male 50 Redbox Automated Retail, LLC. Media 169.4
399993 2017-06-17 02:49:32 Kelsey Jones male 50 McDonald' s Dining 55.72
399994 2017-06-28 08:49:46 Kelsey Jones male 50 Long Win Bus Company Limited Transport 156.04
399995 2017-07-02 21:33:10 Kelsey Jones male 50 New Oriental Education & Tech Grp Education 39.57
399996 2017-06-26 10:09:42 Kelsey Jones male 50 Xbox Entertainment 222.35
399997 2017-06-13 04:40:07 Kelsey Jones male 50 Tim Ho Wan Dining 163.53
399998 2017-06-13 16:58:51 Kelsey Jones male 50 G2000 Clothing 16.58
399999 2017-07-05 07:28:44 Kelsey Jones male 50 FANCL International, Inc. Beauty 211.63

400000 rows × 7 columns


In [26]:
transactionDf.loc[transactionDf['name']=='Kelsey Jones', 'name'].shape


Out[26]:
(40,)

In [27]:
np.mean(list(transactionDf.age))
# transactionDf = transactionDf[['time','name','','age','company name', 'company label', 'amount']]
transactionDf.to_csv('transaction.csv', index=False)

In [28]:
df = pd.read_csv('transaction.csv')
df


Out[28]:
time name gender age company name company label amount
0 2017-07-02 22:27:48 Rachel Trujillo male 36 Coursera Inc. Education 111.22
1 2017-06-10 12:37:05 Rachel Trujillo male 36 Laura Mercier Cosmetics. Beauty 108.46
2 2017-06-07 14:24:02 Rachel Trujillo male 36 Synovus Financial Corp. Investment 208.21
3 2017-06-25 15:36:22 Rachel Trujillo male 36 Peak Tramways Company Limited Transport 222.79
4 2017-06-30 03:34:48 Rachel Trujillo male 36 McDonald' s Dining 86.28
5 2017-06-29 18:00:00 Rachel Trujillo male 36 Pearson plc Education 47.68
6 2017-06-30 21:34:26 Rachel Trujillo male 36 Wal-Mart Stores Inc Grocery 93.69
7 2017-07-04 11:57:56 Rachel Trujillo male 36 Principal Financial Group Inc Investment 115.46
8 2017-06-13 06:54:58 Rachel Trujillo male 36 Voya Financial, Inc. Investment 142.11
9 2017-07-02 18:23:56 Rachel Trujillo male 36 The Dairy Farm Company, Limited Grocery 61.87
10 2017-06-17 03:32:26 Rachel Trujillo male 36 2U, Inc. Education 280.35
11 2017-07-03 13:44:19 Rachel Trujillo male 36 City Super Limited Grocery 281.82
12 2017-06-16 16:16:17 Rachel Trujillo male 36 GAP Clothing 9.13
13 2017-06-17 01:17:02 Rachel Trujillo male 36 Subway Dining 104.77
14 2017-06-21 11:14:37 Rachel Trujillo male 36 KOKUYO Co.,Ltd. Stationery 300.88
15 2017-06-11 21:15:08 Rachel Trujillo male 36 Raymond James Financial, Inc. Investment 33.22
16 2017-06-14 11:45:03 Rachel Trujillo male 36 KOKUYO Co.,Ltd. Stationery 297.04
17 2017-07-03 21:29:17 Rachel Trujillo male 36 Liberty Stationery Corp. Stationery 133.08
18 2017-06-20 09:04:47 Rachel Trujillo male 36 Ikari Area Entertainment 9.43
19 2017-06-10 18:31:22 Rachel Trujillo male 36 The 'Star' Ferry Company, Limited Transport 201.96
20 2017-07-05 20:58:23 Rachel Trujillo male 36 Tim Ho Wan Dining 292.67
21 2017-06-23 20:15:43 Rachel Trujillo male 36 BB Liquidating Inc Media 163.39
22 2017-06-24 15:32:14 Rachel Trujillo male 36 Parker Pen Company Stationery 260.15
23 2017-06-24 04:00:43 Rachel Trujillo male 36 Le.com Media 149.06
24 2017-07-04 03:13:19 Rachel Trujillo male 36 J. Crew Clothing 273.85
25 2017-06-12 00:30:44 Rachel Trujillo male 36 Voya Financial, Inc. Investment 93.04
26 2017-06-20 09:50:33 Rachel Trujillo male 36 Neway Karaoke Box Limited Entertainment 239.05
27 2017-06-22 22:08:41 Rachel Trujillo male 36 TCF Financial Corporation Investment 244.24
28 2017-06-11 13:26:35 Rachel Trujillo male 36 Principal Financial Group Inc Investment 117.48
29 2017-07-04 09:40:59 Rachel Trujillo male 36 Laura Mercier Cosmetics. Beauty 134.45
... ... ... ... ... ... ... ...
399970 2017-06-12 23:41:20 Kelsey Jones male 50 Sumitomo Mitsui Financial Group Inc Investment 16.89
399971 2017-06-29 22:55:52 Kelsey Jones male 50 Udacity, Inc. Education 197.18
399972 2017-06-08 21:36:12 Kelsey Jones male 50 Staedtler Mars GmbH & Co. KG Stationery 81.63
399973 2017-06-07 07:32:09 Kelsey Jones male 50 Waddell & Reed Financial, Inc. Investment 258.03
399974 2017-06-30 10:49:34 Kelsey Jones male 50 Shenzhen Comix Stationery Co., Ltd. Stationery 200.22
399975 2017-06-29 10:25:22 Kelsey Jones male 50 Hong Kong Dragon Airlines Ltd Transport 16.32
399976 2017-07-02 23:05:33 Kelsey Jones male 50 Maxim' s Catering Dining 265.12
399977 2017-06-14 09:14:25 Kelsey Jones male 50 Shinhan Financial Group Co Ltd Investment 113.96
399978 2017-06-16 23:01:34 Kelsey Jones male 50 The Kowloon Motor Bus Company Limited Transport 51.15
399979 2017-06-30 20:05:53 Kelsey Jones male 50 Xbox Entertainment 233.69
399980 2017-06-14 11:43:47 Kelsey Jones male 50 J. Crew Clothing 141.99
399981 2017-06-07 09:19:46 Kelsey Jones male 50 New World First Bus Services Limited Transport 197.35
399982 2017-06-28 11:42:38 Kelsey Jones male 50 edX Inc. Education 294.94
399983 2017-06-13 14:28:37 Kelsey Jones male 50 Yadkin Financial Corporation Investment 52.28
399984 2017-06-17 13:29:12 Kelsey Jones male 50 Netflix, Inc. Media 31.73
399985 2017-06-15 06:16:08 Kelsey Jones male 50 Ikari Area Entertainment 201.77
399986 2017-06-23 18:05:50 Kelsey Jones male 50 Levi Strauss & Co. Clothing 272.93
399987 2017-06-16 12:47:34 Kelsey Jones male 50 Spotify AB Media 114.97
399988 2017-06-22 20:57:56 Kelsey Jones male 50 Cathay Pacific Airways Transport 71.12
399989 2017-06-26 04:56:07 Kelsey Jones male 50 Coursera Inc. Education 275.30
399990 2017-07-02 17:11:21 Kelsey Jones male 50 Code School LLC. Education 92.36
399991 2017-06-16 02:28:35 Kelsey Jones male 50 Ryze Ultimate Trampoline Park Hong Kong Entertainment 283.62
399992 2017-07-04 20:10:37 Kelsey Jones male 50 Redbox Automated Retail, LLC. Media 169.40
399993 2017-06-17 02:49:32 Kelsey Jones male 50 McDonald' s Dining 55.72
399994 2017-06-28 08:49:46 Kelsey Jones male 50 Long Win Bus Company Limited Transport 156.04
399995 2017-07-02 21:33:10 Kelsey Jones male 50 New Oriental Education & Tech Grp Education 39.57
399996 2017-06-26 10:09:42 Kelsey Jones male 50 Xbox Entertainment 222.35
399997 2017-06-13 04:40:07 Kelsey Jones male 50 Tim Ho Wan Dining 163.53
399998 2017-06-13 16:58:51 Kelsey Jones male 50 G2000 Clothing 16.58
399999 2017-07-05 07:28:44 Kelsey Jones male 50 FANCL International, Inc. Beauty 211.63

400000 rows × 7 columns


In [29]:
df['time']


Out[29]:
0         2017-07-02 22:27:48
1         2017-06-10 12:37:05
2         2017-06-07 14:24:02
3         2017-06-25 15:36:22
4         2017-06-30 03:34:48
5         2017-06-29 18:00:00
6         2017-06-30 21:34:26
7         2017-07-04 11:57:56
8         2017-06-13 06:54:58
9         2017-07-02 18:23:56
10        2017-06-17 03:32:26
11        2017-07-03 13:44:19
12        2017-06-16 16:16:17
13        2017-06-17 01:17:02
14        2017-06-21 11:14:37
15        2017-06-11 21:15:08
16        2017-06-14 11:45:03
17        2017-07-03 21:29:17
18        2017-06-20 09:04:47
19        2017-06-10 18:31:22
20        2017-07-05 20:58:23
21        2017-06-23 20:15:43
22        2017-06-24 15:32:14
23        2017-06-24 04:00:43
24        2017-07-04 03:13:19
25        2017-06-12 00:30:44
26        2017-06-20 09:50:33
27        2017-06-22 22:08:41
28        2017-06-11 13:26:35
29        2017-07-04 09:40:59
                 ...         
399970    2017-06-12 23:41:20
399971    2017-06-29 22:55:52
399972    2017-06-08 21:36:12
399973    2017-06-07 07:32:09
399974    2017-06-30 10:49:34
399975    2017-06-29 10:25:22
399976    2017-07-02 23:05:33
399977    2017-06-14 09:14:25
399978    2017-06-16 23:01:34
399979    2017-06-30 20:05:53
399980    2017-06-14 11:43:47
399981    2017-06-07 09:19:46
399982    2017-06-28 11:42:38
399983    2017-06-13 14:28:37
399984    2017-06-17 13:29:12
399985    2017-06-15 06:16:08
399986    2017-06-23 18:05:50
399987    2017-06-16 12:47:34
399988    2017-06-22 20:57:56
399989    2017-06-26 04:56:07
399990    2017-07-02 17:11:21
399991    2017-06-16 02:28:35
399992    2017-07-04 20:10:37
399993    2017-06-17 02:49:32
399994    2017-06-28 08:49:46
399995    2017-07-02 21:33:10
399996    2017-06-26 10:09:42
399997    2017-06-13 04:40:07
399998    2017-06-13 16:58:51
399999    2017-07-05 07:28:44
Name: time, Length: 400000, dtype: object

In [19]:
t.extend(f)
len(t)


Out[19]:
1927

In [20]:
with open('output.log', 'w') as f:
    for line in t:
        f.write(line)
        f.write('\n')

In [21]:
! ls


LICENSE                                credit-card-fraud-data-generator.ipynb
README.md                              output.log
companies.csv                          requirements.txt

In [22]:
! head output.log


2017-07-02 12:17:52 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Emergent Biosolutions, Inc." amount=267.69 disputed=false
2017-07-02 12:18:03 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Blackrock Core Bond Trust" amount=245.13 disputed=false
2017-07-02 12:18:03 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Phillips 66 Partners LP" amount=160.64 disputed=false
2017-07-02 12:17:40 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Kimberly-Clark Corporation" amount=148.37 disputed=false
2017-07-02 12:17:37 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Seaspan Corporation" amount=233.69 disputed=false
2017-07-02 12:17:56 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Time Warner Inc." amount=164.46 disputed=false
2017-07-02 12:18:13 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Flaherty & Crumrine Preferred Income Opportunity Fund Inc" amount=232.47 disputed=false
2017-07-02 12:17:32 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="Stone Energy Corporation" amount=165.0 disputed=false
2017-07-02 12:17:36 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="TELUS Corporation" amount=123.96 disputed=false
2017-07-02 12:17:28 uuid=6a8d3fa6-e47e-4c59-a469-c2a4a4ac4247 user="Lisa Henderson" business="SunCoke Energy, Inc." amount=111.38 disputed=false