Exploratory Analysis


In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

Load transaction data into pandas dataframes

The transaction data and block data are in separate json files


In [8]:
transactions1 = pd.read_json('./../data/transactions.json', lines=True)

In [9]:
transactions2 = pd.read_json('./../data/new_transactions.json', lines = True)

In [10]:
transactions1.head()


Out[10]:
_id accountNonce amount blockHash block_id gasLimit gasUsed hash isContractTx newContract parentHash price recipient sender time txIndex type
0 {u'$oid': u'59c2ce9a66a1615156cf51e7'} 3324654 4.990932e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xfc505bf23efe13154192262c3f7b99e2517b133fe5f6... NaN 0 0xfc505bf23efe13154192262c3f7b99e2517b133fe5f6... 2.100000e+10 0x656dd5d0020dc9047e7d9ec4eac344a7397200e8 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
1 {u'$oid': u'59c2ce9a66a1615156cf51e8'} 3324658 1.492221e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 20967 9416 0xfa0124a64e4cedc7d75dff3ee0fca13d32b2a368e0cb... NaN 0 0xad177c193597850a4c7c95cb990233a1190bd6118d33... 2.100000e+10 0x209c4784ab1e8183cf58ca33cb740efbf3fc18ef 0x0a8de3650a8f331e85673fe71677d53a749e6de7 2017-09-20T20:24:15.000Z 1.0 call
2 {u'$oid': u'59c2ce9a66a1615156cf51e9'} 3324660 9.960846e+16 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xe2a23b916903133728ab3dbfac55a56651508a971e89... NaN 0 0xe2a23b916903133728ab3dbfac55a56651508a971e89... 2.100000e+10 0x951bec2b87e89a80ed785de7c9df1555cb8d4e05 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
3 {u'$oid': u'59c2ce9a66a1615156cf51ea'} 3324657 1.990048e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xc431fcd82ad55dc1aee932fa9965b55f8515981147a0... NaN 0 0xc431fcd82ad55dc1aee932fa9965b55f8515981147a0... 2.100000e+10 0xfd094f940ab0d01592c21afb20b500406ed509f6 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
4 {u'$oid': u'59c2ce9a66a1615156cf51eb'} 3324662 1.000289e+18 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xb759c34cb5b5af6f94f7002ef812b8482880d1f73d10... NaN 0 0xb759c34cb5b5af6f94f7002ef812b8482880d1f73d10... 2.100000e+10 0x08dd9744e0dedef75f74357ca948485ea1d2d1f4 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx

In [11]:
transactions2.head()


Out[11]:
_id accountNonce amount blockHash block_id gasLimit gasUsed hash isContractTx newContract parentHash price recipient sender time txIndex type
0 {u'$oid': u'59c75a7666a161e4251b3c90'} 0 5.000000e+17 0x21162a45e066f7179e2e0ad3ced6f9a2f7d1e5a29838... 4306830 2300 0 0xfd5dc38f4565a75a7d0f94408a6115e588c4ef46b45a... NaN 0 0xd23f66df21168df1ed5cb1fa6ccc54ba9982ab7f75ff... 2.100000e+10 0x32be343b94f860124dc4fee278fdcbd38c102d88 0x209c4784ab1e8183cf58ca33cb740efbf3fc18ef 2017-09-24T07:08:15.000Z 2.0 call
1 {u'$oid': u'59c75a7666a161e4251b3c91'} 16 3.000000e+16 0x21162a45e066f7179e2e0ad3ced6f9a2f7d1e5a29838... 4306830 21000 21000 0xee6806c6cbd7cfc4ddcdb6be21fa3406dfa99ccbca11... NaN 0 0xee6806c6cbd7cfc4ddcdb6be21fa3406dfa99ccbca11... 2.700000e+10 0x5550e18823a7f6bb9278137d83f7dd7107045854 0x18f0cde3425b1047cb7dd253d1c08c0ed27b783f 2017-09-24T07:08:15.000Z NaN tx
2 {u'$oid': u'59c75a7666a161e4251b3c92'} 0 5.000000e+17 0x21162a45e066f7179e2e0ad3ced6f9a2f7d1e5a29838... 4306830 39512 39512 0xd23f66df21168df1ed5cb1fa6ccc54ba9982ab7f75ff... NaN 0 0xd23f66df21168df1ed5cb1fa6ccc54ba9982ab7f75ff... 2.100000e+10 0x0494448fc02a85355585eb5ce355c33cb7087905 0xf971282c0d9e679cb6cba4c80e49d2507a3d27c9 2017-09-24T07:08:15.000Z NaN tx
3 {u'$oid': u'59c75a7666a161e4251b3c93'} 517 1.197012e+17 0x21162a45e066f7179e2e0ad3ced6f9a2f7d1e5a29838... 4306830 35000 30981 0xaada72da8b31eb005f14fa968c56d6778d3d0815140e... NaN 0 0xaada72da8b31eb005f14fa968c56d6778d3d0815140e... 2.500000e+10 0xfa52274dd61e1643d2205169732f29114bc240b3 0x5f3a0f7ec5ee06179ee89ea188c8f53ecc253257 2017-09-24T07:08:15.000Z NaN tx
4 {u'$oid': u'59c75a7666a161e4251b3c94'} 0 1.383644e+17 0x21162a45e066f7179e2e0ad3ced6f9a2f7d1e5a29838... 4306830 21000 21000 0xa88b481c0e481e54507aeee484517d750075ec6a700d... NaN 0 0xa88b481c0e481e54507aeee484517d750075ec6a700d... 2.000000e+10 0x70faa28a6b8d6829a4b1e649d26ec9a2a39ba413 0xaf7f2ca0ef0e984b6c13443002291b7d86e479b7 2017-09-24T07:08:15.000Z NaN tx

In [12]:
transactions1.shape, transactions2.shape


Out[12]:
((1000527, 17), (922031, 17))

Append the transaction data into a single dataframe


In [13]:
transactions = transactions1.append(transactions2)

In [14]:
transactions.shape


Out[14]:
(1922558, 17)

In [15]:
transactions['hash'].values.shape


Out[15]:
(1922558,)

In [16]:
np.unique(transactions['hash'].values).shape


Out[16]:
(1922558,)

In [17]:
transactions.head()


Out[17]:
_id accountNonce amount blockHash block_id gasLimit gasUsed hash isContractTx newContract parentHash price recipient sender time txIndex type
0 {u'$oid': u'59c2ce9a66a1615156cf51e7'} 3324654 4.990932e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xfc505bf23efe13154192262c3f7b99e2517b133fe5f6... NaN 0 0xfc505bf23efe13154192262c3f7b99e2517b133fe5f6... 2.100000e+10 0x656dd5d0020dc9047e7d9ec4eac344a7397200e8 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
1 {u'$oid': u'59c2ce9a66a1615156cf51e8'} 3324658 1.492221e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 20967 9416 0xfa0124a64e4cedc7d75dff3ee0fca13d32b2a368e0cb... NaN 0 0xad177c193597850a4c7c95cb990233a1190bd6118d33... 2.100000e+10 0x209c4784ab1e8183cf58ca33cb740efbf3fc18ef 0x0a8de3650a8f331e85673fe71677d53a749e6de7 2017-09-20T20:24:15.000Z 1.0 call
2 {u'$oid': u'59c2ce9a66a1615156cf51e9'} 3324660 9.960846e+16 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xe2a23b916903133728ab3dbfac55a56651508a971e89... NaN 0 0xe2a23b916903133728ab3dbfac55a56651508a971e89... 2.100000e+10 0x951bec2b87e89a80ed785de7c9df1555cb8d4e05 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
3 {u'$oid': u'59c2ce9a66a1615156cf51ea'} 3324657 1.990048e+17 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xc431fcd82ad55dc1aee932fa9965b55f8515981147a0... NaN 0 0xc431fcd82ad55dc1aee932fa9965b55f8515981147a0... 2.100000e+10 0xfd094f940ab0d01592c21afb20b500406ed509f6 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx
4 {u'$oid': u'59c2ce9a66a1615156cf51eb'} 3324662 1.000289e+18 0xce9a4be92a53142c8e1962405342ded3472d5516bbc9... 4295887 50000 21000 0xb759c34cb5b5af6f94f7002ef812b8482880d1f73d10... NaN 0 0xb759c34cb5b5af6f94f7002ef812b8482880d1f73d10... 2.100000e+10 0x08dd9744e0dedef75f74357ca948485ea1d2d1f4 0xea674fdde714fd979de3edf0f56aa9716b898ec8 2017-09-20T20:24:15.000Z NaN tx

Apparently there are no duplicate transaction blocks

Load block data into pandas dataframes


In [18]:
blocks1 = pd.read_json('./../data/blocks.json', lines=True)

In [19]:
blocks2 = pd.read_json('./../data/blocks_more.json', lines=True)

In [20]:
blocks3 = pd.read_json('./../data/blocks_2.json', lines=True)

In [21]:
blocks1.shape, blocks2.shape, blocks3.shape


Out[21]:
((9623, 21), (26, 21), (751223, 21))

Append the block data into single dataframe


In [22]:
blocks = blocks1.append([blocks2, blocks3])

In [23]:
blocks.shape


Out[23]:
(760872, 21)

In [24]:
blocks['number'].head()


Out[24]:
0    4292608.0
1    4292609.0
2    4292610.0
3    4292611.0
4    4292612.0
Name: number, dtype: float64

In [25]:
blocks.head()


Out[25]:
_id blockTime coinbase difficulty extra gasLimit gasUsed hash mixDigest nonce ... parentHash reward root size time totalFee txHash tx_count uncleHash uncle_count
0 {u'$oid': u'59c5ce4166a161b6940c900f'} 22 0x829bd824b016326a401d083b33d092293333a830 {u'$numberLong': u'2415063514546603'} 0xe4b883e5bda9e7a59ee4bb99e9b1bc 6722221.0 1638699.0 0xed5a6d0ab1d8ebece462c60c5764013f8649fd2e340d... NaN 0x88c3313c5808ea0b55 ... 0x03ca1d1d9bc8976c871fe255ff8855b11408900c578b... {u'$numberLong': u'5048829031919903000'} 0x11466e02b478876a595f6032b3e434c80e3cf89dcae9... 6331 2017-09-19T22:30:42.000Z {u'$numberLong': u'48829031919903220'} 0x1a2f7f901498f820edd20d27a34ae46b4b766ff66ab1... 43 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0
1 {u'$oid': u'59c5ce4666a161b6940c9010'} 13 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2416163026174379'} 0x65746865726d696e652d657535 6715685.0 147000.0 0xe28f0bdb771db682f50eb672f6250427358a551aff35... NaN 0x8844b0481556efb1ba ... 0xed5a6d0ab1d8ebece462c60c5764013f8649fd2e340d... {u'$numberLong': u'5003150000000000000'} 0x3d612737f50e54b4f81d0bf0df3ddc6c921590c83ef7... 1306 2017-09-19T22:30:55.000Z {u'$numberLong': u'3150000000000000'} 0x45fa09c65004ef8381993a3b9aa2fd4c9b69c55cc0e9... 7 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0
2 {u'$oid': u'59c5ce4c66a161b6940c9011'} 2 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2418442304904779'} 0x65746865726d696e652d6173696137 6709136.0 6467763.0 0x3677a004073c59fd13342b10ee9e4f78838e116d6b8e... NaN 0x882bda7cc01380352b ... 0xe28f0bdb771db682f50eb672f6250427358a551aff35... {u'$numberLong': u'5067934995026004000'} 0x3491fdd5fb13ff6445b4919dd771354a6e6b4b97cb2c... 21486 2017-09-19T22:30:57.000Z {u'$numberLong': u'67934995026003860'} 0x33cf3315cf8d7fdfffaf8749b8f4365ab1d30f586901... 47 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0
3 {u'$oid': u'59c5ce5166a161b6940c9012'} 24 0xb2930b35844a230f00e51431acae96fe543a0347 {u'$numberLong': u'2418360936500864'} 0x7439 6702587.0 6643541.0 0xaf013391d383533db3fd2ff9bfa58f17db9d64c1ff6f... NaN 0x884334f56011dd99ff ... 0x3677a004073c59fd13342b10ee9e4f78838e116d6b8e... {u'$numberLong': u'5062887994431406000'} 0xc4c2bd2ea2e93af93fe168cf1fa30e59f1c5394c39e3... 15432 2017-09-19T22:31:21.000Z {u'$numberLong': u'62887994431406510'} 0xbc846d47e24ec42f482644489c90c4b516d626896dac... 86 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0
4 {u'$oid': u'59c5ce5766a161b6940c9013'} 27 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2418279607827615'} 0x65746865726d696e652d657532 6709099.0 229962.0 0x7fd784d3a2ec316efa7e1dde6d53c5b4572fdeddbc83... NaN 0x88a7da800018156043 ... 0xaf013391d383533db3fd2ff9bfa58f17db9d64c1ff6f... {u'$numberLong': u'5004893252000000000'} 0x4784e3313ad18136db83ef9f715858022c6e13ecbbed... 1649 2017-09-19T22:31:48.000Z {u'$numberLong': u'4893252000000000'} 0xeda7239a129ba5ae52688bd7cbee9089b84d45fff09e... 10 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0

5 rows × 21 columns


In [26]:
blocks.columns


Out[26]:
Index([u'_id', u'blockTime', u'coinbase', u'difficulty', u'extra', u'gasLimit',
       u'gasUsed', u'hash', u'mixDigest', u'nonce', u'number', u'parentHash',
       u'reward', u'root', u'size', u'time', u'totalFee', u'txHash',
       u'tx_count', u'uncleHash', u'uncle_count'],
      dtype='object')

Create new block_id column converting floats into integers


In [27]:
blocks['block_id'] = blocks['number'].apply(lambda x: int(round(x)))

In [28]:
blocks['number'].values.shape


Out[28]:
(760872,)

In [29]:
np.unique(blocks['number'].values).shape


Out[29]:
(756010,)

In [30]:
blocks['block_id'].unique().shape


Out[30]:
(756009,)

In [31]:
blocks[['number', 'block_id']].head()


Out[31]:
number block_id
0 4292608.0 4292608
1 4292609.0 4292609
2 4292610.0 4292610
3 4292611.0 4292611
4 4292612.0 4292612

Drop duplicate blocks


In [32]:
blocks.drop_duplicates(subset='block_id', inplace=True)

In [33]:
blocks.shape


Out[33]:
(756009, 22)

In [34]:
blocks['number'].values.shape


Out[34]:
(756009,)

In [35]:
np.unique(blocks['number'].values).shape


Out[35]:
(756009,)

In [36]:
blocks.head()


Out[36]:
_id blockTime coinbase difficulty extra gasLimit gasUsed hash mixDigest nonce ... reward root size time totalFee txHash tx_count uncleHash uncle_count block_id
0 {u'$oid': u'59c5ce4166a161b6940c900f'} 22 0x829bd824b016326a401d083b33d092293333a830 {u'$numberLong': u'2415063514546603'} 0xe4b883e5bda9e7a59ee4bb99e9b1bc 6722221.0 1638699.0 0xed5a6d0ab1d8ebece462c60c5764013f8649fd2e340d... NaN 0x88c3313c5808ea0b55 ... {u'$numberLong': u'5048829031919903000'} 0x11466e02b478876a595f6032b3e434c80e3cf89dcae9... 6331 2017-09-19T22:30:42.000Z {u'$numberLong': u'48829031919903220'} 0x1a2f7f901498f820edd20d27a34ae46b4b766ff66ab1... 43 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0 4292608
1 {u'$oid': u'59c5ce4666a161b6940c9010'} 13 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2416163026174379'} 0x65746865726d696e652d657535 6715685.0 147000.0 0xe28f0bdb771db682f50eb672f6250427358a551aff35... NaN 0x8844b0481556efb1ba ... {u'$numberLong': u'5003150000000000000'} 0x3d612737f50e54b4f81d0bf0df3ddc6c921590c83ef7... 1306 2017-09-19T22:30:55.000Z {u'$numberLong': u'3150000000000000'} 0x45fa09c65004ef8381993a3b9aa2fd4c9b69c55cc0e9... 7 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0 4292609
2 {u'$oid': u'59c5ce4c66a161b6940c9011'} 2 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2418442304904779'} 0x65746865726d696e652d6173696137 6709136.0 6467763.0 0x3677a004073c59fd13342b10ee9e4f78838e116d6b8e... NaN 0x882bda7cc01380352b ... {u'$numberLong': u'5067934995026004000'} 0x3491fdd5fb13ff6445b4919dd771354a6e6b4b97cb2c... 21486 2017-09-19T22:30:57.000Z {u'$numberLong': u'67934995026003860'} 0x33cf3315cf8d7fdfffaf8749b8f4365ab1d30f586901... 47 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0 4292610
3 {u'$oid': u'59c5ce5166a161b6940c9012'} 24 0xb2930b35844a230f00e51431acae96fe543a0347 {u'$numberLong': u'2418360936500864'} 0x7439 6702587.0 6643541.0 0xaf013391d383533db3fd2ff9bfa58f17db9d64c1ff6f... NaN 0x884334f56011dd99ff ... {u'$numberLong': u'5062887994431406000'} 0xc4c2bd2ea2e93af93fe168cf1fa30e59f1c5394c39e3... 15432 2017-09-19T22:31:21.000Z {u'$numberLong': u'62887994431406510'} 0xbc846d47e24ec42f482644489c90c4b516d626896dac... 86 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0 4292611
4 {u'$oid': u'59c5ce5766a161b6940c9013'} 27 0xea674fdde714fd979de3edf0f56aa9716b898ec8 {u'$numberLong': u'2418279607827615'} 0x65746865726d696e652d657532 6709099.0 229962.0 0x7fd784d3a2ec316efa7e1dde6d53c5b4572fdeddbc83... NaN 0x88a7da800018156043 ... {u'$numberLong': u'5004893252000000000'} 0x4784e3313ad18136db83ef9f715858022c6e13ecbbed... 1649 2017-09-19T22:31:48.000Z {u'$numberLong': u'4893252000000000'} 0xeda7239a129ba5ae52688bd7cbee9089b84d45fff09e... 10 0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a... 0 4292612

5 rows × 22 columns

Now we have a unique block set

Merge transaction and block data


In [37]:
transactions.columns


Out[37]:
Index([u'_id', u'accountNonce', u'amount', u'blockHash', u'block_id',
       u'gasLimit', u'gasUsed', u'hash', u'isContractTx', u'newContract',
       u'parentHash', u'price', u'recipient', u'sender', u'time', u'txIndex',
       u'type'],
      dtype='object')

In [38]:
blocks.columns


Out[38]:
Index([        u'_id',   u'blockTime',    u'coinbase',  u'difficulty',
             u'extra',    u'gasLimit',     u'gasUsed',        u'hash',
         u'mixDigest',       u'nonce',      u'number',  u'parentHash',
            u'reward',        u'root',        u'size',        u'time',
          u'totalFee',      u'txHash',    u'tx_count',   u'uncleHash',
       u'uncle_count',    u'block_id'],
      dtype='object')

Merge the dataframes on block_id


In [39]:
merged_df = transactions.merge(blocks, left_on='block_id', right_on='block_id', 
                               suffixes=('_t', '_b'))

In [40]:
transactions.shape


Out[40]:
(1922558, 17)

In [41]:
blocks.shape


Out[41]:
(756009, 22)

In [42]:
merged_df.shape


Out[42]:
(1494279, 38)

In [43]:
merged_df['hash_t'].unique().shape


Out[43]:
(1494279,)

In [44]:
merged_df.columns


Out[44]:
Index([u'_id_t', u'accountNonce', u'amount', u'blockHash', u'block_id',
       u'gasLimit_t', u'gasUsed_t', u'hash_t', u'isContractTx', u'newContract',
       u'parentHash_t', u'price', u'recipient', u'sender', u'time_t',
       u'txIndex', u'type', u'_id_b', u'blockTime', u'coinbase', u'difficulty',
       u'extra', u'gasLimit_b', u'gasUsed_b', u'hash_b', u'mixDigest',
       u'nonce', u'number', u'parentHash_b', u'reward', u'root', u'size',
       u'time_b', u'totalFee', u'txHash', u'tx_count', u'uncleHash',
       u'uncle_count'],
      dtype='object')

Data cleaning


In [45]:
merged_df.columns


Out[45]:
Index([u'_id_t', u'accountNonce', u'amount', u'blockHash', u'block_id',
       u'gasLimit_t', u'gasUsed_t', u'hash_t', u'isContractTx', u'newContract',
       u'parentHash_t', u'price', u'recipient', u'sender', u'time_t',
       u'txIndex', u'type', u'_id_b', u'blockTime', u'coinbase', u'difficulty',
       u'extra', u'gasLimit_b', u'gasUsed_b', u'hash_b', u'mixDigest',
       u'nonce', u'number', u'parentHash_b', u'reward', u'root', u'size',
       u'time_b', u'totalFee', u'txHash', u'tx_count', u'uncleHash',
       u'uncle_count'],
      dtype='object')

Select subset of columns to work with


In [46]:
sel_cols = ['hash_t',
            'accountNonce', 
            'amount', 
            'block_id', 
            'gasLimit_t', 
            'gasUsed_t',
            'isContractTx',
            'newContract',
            'price',
            'time_t',
            'txIndex', 
            'type', 
            'blockTime',
            'difficulty',
            'gasLimit_b',
            'gasUsed_b',
            'reward',
            'size',
            'time_b',
            'totalFee',
            'tx_count',
            'uncle_count']

In [47]:
df = merged_df[sel_cols]

In [48]:
print('no. transactions: {}, no. blocks: {}'.format(np.unique(df['hash_t'].values).shape[0],
                                                    np.unique(df['block_id'].values).shape[0]))


no. transactions: 1494279, no. blocks: 14583

Convert dates to datetime


In [49]:
df.loc[:,'time_t'] = pd.to_datetime(df.time_t, yearfirst=True)


//anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s

In [50]:
df.loc[:,'time_b'] = pd.to_datetime(df.time_b, yearfirst=True)

In [51]:
dates = df['time_t'].values.astype('datetime64[D]')

In [52]:
np.unique(dates)


Out[52]:
array(['2017-09-18', '2017-09-19', '2017-09-20', '2017-09-21',
       '2017-09-22', '2017-09-23', '2017-09-24'], dtype='datetime64[D]')

The dates of blockchain data span from 9/18/17 to 9/24/17


In [58]:
df['time_t'].head()


Out[58]:
0   2017-09-20 20:24:15
1   2017-09-20 20:24:15
2   2017-09-20 20:24:15
3   2017-09-20 20:24:15
4   2017-09-20 20:24:15
Name: time_t, dtype: datetime64[ns]

In [53]:
np.unique(df['hash_t'].values).shape


Out[53]:
(1494279,)

In [54]:
df.shape


Out[54]:
(1494279, 22)

Check for duplicate rows


In [55]:
df.duplicated(subset='hash_t').value_counts()


Out[55]:
False    1494279
dtype: int64

No duplicates found

Creat CSV file of final dataset


In [57]:
df.to_csv('./../data/data.csv')

In [ ]: