Mémoire DSSP : Communities and anomalies detection in ethereum blockchain


Objectives :

  • Find communties in ethereum blockchain transactions
  • Find anomalies in transactions
  • Build a predictor for each new transactions to give an anomalie score
  • Build a predictor to classify new transaction


In [9]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import timeit
import networkx as nx

In [4]:
#%%timeit
#Read csv file
transactions = pd.read_csv("../data/transactions-1-2184224.csv")
transactions = transactions.drop('Unnamed: 0', 1)

In [5]:
nodes_from = transactions['from'].unique()
nodes_to = transactions['to'].unique()
nodes = np.union1d(nodes_from , nodes_to) # np array

In [6]:
print "Nodes from : %s" % (len(nodes_from))
print "Nodes to : %s" % len(nodes_to)
print "Total nodes : %s" % len(nodes)


Nodes from : 362111
Nodes to : 432844
Total nodes : 453155

In [7]:
transactions.tail(10)


Out[7]:
blockHash blockNumber from gas gasPrice hash input nonce timestamp to transactionIndex value
9440482 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0x96b838e080882b615e66eeb979d207e0866e06ea 0x15f90 0x4a817c800 0x52e33325f9e6cf4933c1595f44a18d0349327a749ac9... 0x0000000000000000000000000000000000000000 0xa 0x57c9343d 0xb36cbe7f95a39984384e6aa4068b02c1697ef80e 0x12 0x165b517ae5e6000
9440483 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0xdec45b952dfdc23e79f36df7d47b99bba2f8eb38 0xc350 0x4a817c800 0xf56820f2698996c55366f2398ef69d7a7d3d4da21413... 0x 0xf 0x57c9343d 0x7727e5113d1d161373623e5f49fd568b4f543a9e 0x13 0x3003c71d327f5a0
9440484 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0xf8ec54bce4b903bd4bb0c016c0e9899e66af3a23 0x2062a 0x4a817c800 0xd7ee0c08035b3384c4fc5b3b154f30e46b650f378010... 0x0f2c9329000000000000000000000000fbb1b73c4f0b... 0xc0 0x57c9343d 0xe94b04a0fed112f3664e45adb2b8915693dd5ff3 0x14 0x7ea1b84ccb78000
9440485 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0x40ce7569d555dbf939e58867be78fd76142df821 0xe57e0 0x4a817c800 0x5c0f2faecec066ef928de323379369cd681be469edb1... 0x 0x50df 0x57c9343d 0x8dc549a1495e2baddfac568624ea402e98a09acf 0x15 0x5e4fd0978a0b400
9440486 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0xd8da6bf26964af9d7eed9e03e53415d37aa96045 0xe57e0 0x4a817c800 0x3791d0c2fe59e22f1ff21d179bd1cf49059e6d8a1fdf... 0x 0x255 0x57c9343d 0xf5446e77728810d4fded1708b888ab6a4eb8075e 0x16 0x3782dace9d900000
9440487 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0x3d5f3243ed50c3614efbe6853bab45d1a55a5892 0x2062a 0x4a817c800 0xbcb37f6e7a10aa70bff2a9ff2045e7bdb3d3e7797f7e... 0x0f2c9329000000000000000000000000fbb1b73c4f0b... 0x254 0x57c9343d 0xe94b04a0fed112f3664e45adb2b8915693dd5ff3 0x17 0x4b347ceae897c00
9440488 0x954ea41c1e4c8e6f597da75a4f3bd5b64059f7ae40af... 0x21541d 0x6c6bae62f33bf53d1197ed9755003129d7d65a9a 0x5208 0x4a817c800 0x9aec374be0d1fd8e805d3660e3110b7f9582f039014d... 0x 0xb6 0x57c9343d 0x91337a300e0361bddb2e377dd4e88ccb7796663d 0x18 0x19fc3efcaaf2c00
9440489 0x81709e393ce93cb1dec7542fe99bf80a5e4b73c1592f... 0x21541f 0xfd4ef6b7b52ec618881636c851dd9f28683d4d47 0x19a28 0x5220c8ced 0x183c18b671fbfbc35c8baa230638fe0297b8aa30dc56... 0xa9059cbb00000000000000000000000018e11b45aabe... 0x25d9 0x57c9344e 0x37a9679c41e99db270bda88de8ff50c0cd23f326 0x0 0x0
9440490 0x81709e393ce93cb1dec7542fe99bf80a5e4b73c1592f... 0x21541f 0xfd4ef6b7b52ec618881636c851dd9f28683d4d47 0x19a28 0x5220c8ced 0x9a3b4b8c86e4a2e0524f50a1f7d3a23c130d099b1185... 0xa9059cbb00000000000000000000000018e11b45aabe... 0x25da 0x57c9344e 0x37a9679c41e99db270bda88de8ff50c0cd23f326 0x1 0x0
9440491 0x81709e393ce93cb1dec7542fe99bf80a5e4b73c1592f... 0x21541f 0x4ce3ea9c4813bf867d20f6d61b30839fe2346eb0 0x2062a 0x4a817c800 0xff265ca7b9da72d9d0111142b668b92f5b33404873bc... 0x0f2c9329000000000000000000000000fbb1b73c4f0b... 0x73 0x57c9344e 0xe94b04a0fed112f3664e45adb2b8915693dd5ff3 0x2 0x72e6197619fbc00

In [8]:
#Convert hex value to dec
transactions['blockNumber']=transactions['blockNumber'].map(lambda x: int(x,16))
transactions['value']=transactions['value'].map(lambda x: int(x,16))
transactions['gas']=transactions['gas'].map(lambda x: int(x,16))
transactions['gasPrice']=transactions['gasPrice'].map(lambda x: int(x,16))
transactions['transactionIndex']=transactions['transactionIndex'].map(lambda x: int(x,16))
#Convert hex unix timestamp to datetime
transactions['timestamp']=transactions['timestamp'].map(lambda x: datetime.datetime.fromtimestamp(int(x,16)))

In [14]:
transactions.head(20)


Out[14]:
blockHash blockNumber from gas gasPrice hash input nonce timestamp to transactionIndex value
0 0x4e3a3754410177e6937ef1f84bba68ea139e8d1a2258... 46147 0xa1e4380a3b1f749673e270229993ee55f35663b4 21000 50000000000000 0x5c504ed432cb51138bcf09aa5e8a410dd4a1e204ef84... 0x 0x0 2015-08-07 03:30:33 0x5df9b87991262f6ba471f09758cde1c0fc1de734 0 31337
1 0x5793f91c9fa8f824d8ed77fc1687dddcf334da81c68b... 46169 0xbd08e0cddec097db7901ea819a3d1fd9de8951a2 21000 909808707606 0x19f1df2c7ee6b464720ad28e903aeda1a5ad8780afc2... 0x 0x0 2015-08-07 03:36:53 0x5c12a8e43faf884521c2454f39560e6c265a68c8 0 19900000000000000000
2 0xf4a537e8e2233149929a9b6964c9aced6ee95f42131a... 46170 0x63ac545c991243fa18aec41d4f6f598e555015dc 21000 500000000000 0x9e6e19637bb625a8ff3d052b7c2fe57dc78c55a15d25... 0x 0x0 2015-08-07 03:37:10 0xc93f2250589a6563f5359051c1ea25746549f0d8 0 599989500000000000000
3 0x47ec6a0c3467850cf88112c212c262819de6f1d084d3... 46194 0x037dd056e7fdbd641db5b6bea2a8780a83fae180 21000 1000000000000 0xcb9378977089c773c074045b20ede2cdcc3a6ff562f4... 0x 0x0 2015-08-07 03:43:03 0x7e7ec15a5944e978257ddae0008c2f2ece0a6090 0 100000000000000000000
4 0xe6fb31b12d06b5a70f420a28b4e034bcd152abc2d603... 46205 0x3f2f381491797cc5c0d48296c14fd0cd00cdfa2d 21000 500000000000 0x570ce19176bd0002b04a9179309129bbdaf0c4252ffe... 0x 0x0 2015-08-07 03:46:15 0x4bd5f0ee173c81d42765154865ee69361b6ad189 0 803989500000000000000
5 0x6235be352481368721cd978cee3c1e05ce31419b0e35... 46214 0xa1e4380a3b1f749673e270229993ee55f35663b4 21750 50000000000000 0xe17d4d0c4596ea7d5166ad5da600a6fdc49e26e06801... 0x74796d3474406469676978 0x1 2015-08-07 03:49:54 0xc9d4035f4a9226d50f79b73aafb5d874a1b6537e 0 31337
6 0xeab8fe9da0b41b2c003db620bb9adbedd5fcc7222cc5... 46217 0xc8ebccc5f5689fa8659d83713341e5ad19349448 21000 65334370444 0x2ec382949ba0b22443aa4cb38267b1fb5e68e188109a... 0x 0x0 2015-08-07 03:50:51 0xc8ebccc5f5689fa8659d83713341e5ad19349448 0 0
7 0xf5674f4a8c62bb27480155adfbaf272b8bc969509f5a... 46219 0xa1e4380a3b1f749673e270229993ee55f35663b4 21800 50000000000000 0xe891897177614c91284b6929dc2ada5a87705c4729bd... 0x74796d3474406469676978 0x2 2015-08-07 03:51:01 0x5df9b87991262f6ba471f09758cde1c0fc1de734 0 31337
8 0x1e6f0d21ce2260371c95266b5cf4f698841c8c57b4a1... 46220 0xf0cf0af5bd7d8a3a1cad12a30b097265d49f255d 21000 64178193561 0x35d4f3dae18d72a0d4caf02359ca1844687ff879a265... 0x 0x0 2015-08-07 03:51:31 0xb608771949021d2f2f1c9c5afb980ad8bcda3985 0 100000000000000000000
9 0xc3389d535ebbaae818c492901ca40995a2c9b644af71... 46230 0x1c68a66138783a63c98cc675a9ec77af4598d35e 21000 71288549894 0x41738785c4330ce9531aed26b21b9cfba6f27b9183d1... 0x 0x0 2015-08-07 03:52:51 0xc8ebccc5f5689fa8659d83713341e5ad19349448 0 50000000000000000000
10 0x7ea3beb17acb66f15fa33ad71f41d1e2a5dd878fc489... 46235 0xfd2605a2bf58fdbb90db1da55df61628b47f9e8c 21000 70563255618 0x80f31704782a53514ab0693499f78922169885a16fd4... 0x 0x0 2015-08-07 03:54:15 0x073f70b5bfade6409e4951ef72bc8f4157677729 0 10000000000000000
11 0xd6a6763acc2c642b5ba16904a168efc571a48a8e6d20... 46237 0xbbed46565f5aa9af9539f543067821fa4b565438 21000 70543826520 0x3a1be2710cc4c46adaf85bdfd42fa80b0f2044b3d976... 0x 0x0 2015-08-07 03:54:56 0xbf8d8b4ec992203984f7379b001e87e6943dd5e3 0 4406636741121
12 0xbee4330cdd56d2bcc47fa42e52e3c089c649b209acc0... 46239 0x8ce4949d8a16542d423c17984e6739fa72ceb177 21000 1000000000000 0xc0c1c720bc5b3583ad3a4075730b44c0c120a0fe660e... 0x 0x0 2015-08-07 03:55:56 0x15e34a8324164ef0890471f6f527451f7a22cf12 0 100000000000000000000
13 0x23e1d63506f4fd6405f064abaef682c5b4f3b4912030... 46240 0x136d4b662bbd1080cfe4445b0fa213864435b7f1 21000 81400949601 0x04ff148add6a0b71ba11ff0813e376c0bf12a23bc9b9... 0x 0x0 2015-08-07 03:56:49 0xc8ebccc5f5689fa8659d83713341e5ad19349448 0 1000000000000000000000
14 0x095403d60d26507b52ff3559c6c0ddf31a88d68936a2... 46242 0x4d9279962029a8bd45639737e98b511eff074c21 21000 74717879258 0x8e2ba7d83962ec633eab45afa77f37d72ecdfb40d968... 0x 0x0 2015-08-07 03:56:55 0x99c236141daec837ece04fdaee1d90cf8bbdc104 0 1000000000000000000
15 0x318c7465281e9095d2a5effc0cae14bcdee86311d7d5... 46245 0x8ce4949d8a16542d423c17984e6739fa72ceb177 21000 96136464245 0xe063d3191ade43913d74ede2b6e056bf0aeaa216f79d... 0x 0x1 2015-08-07 03:58:21 0x15e34a8324164ef0890471f6f527451f7a22cf12 0 100000000000000000000
16 0x3f2212a89f18f77cb2e7ddb7714ff110da52237410e5... 46246 0x136d4b662bbd1080cfe4445b0fa213864435b7f1 21000 93409693704 0x8514ce8f269e2745264a5d3529b7dea51538304a7e70... 0x 0x1 2015-08-07 03:58:33 0xc8ebccc5f5689fa8659d83713341e5ad19349448 0 2000000000000000000000
17 0x8f21242e15b52b499a0a74b4611c849089b4a8eb15c5... 46247 0x03b35d960c861fe987c50380d76db9d6ae1fa510 21000 91092907897 0x53b783a7552bae97fe01be2ec11d3d9f4344309b5969... 0x 0x0 2015-08-07 03:59:09 0xb83b76f35cf6b2149b34ee092b3fc93b7bb54a8d 0 500000000000000000
18 0x3c93a7ff6fb6f39e63456e9ea6fb069c7783f3c00b10... 46250 0x4d9279962029a8bd45639737e98b511eff074c21 21000 110931381403 0x3811e0631ee580a60359958bf47fc9c61a8558c7ce7b... 0x 0x1 2015-08-07 03:59:43 0xd65f9b9aa7becfd1481bdd7e22527ca0900b7bc8 0 100000000000000000000
19 0xeecb25ce31fb7a212500443a31d463b663a2c5f65e78... 46251 0xfd2605a2bf58fdbb90db1da55df61628b47f9e8c 21000 88893746891 0x304346de678ddca5b1311970e7e8d3f26a42d2924aaa... 0x 0x1 2015-08-07 03:59:50 0x073f70b5bfade6409e4951ef72bc8f4157677729 0 10000000000000000

In [15]:
#Creation Dataframe Nodes :
nodes_df = pd.DataFrame(nodes,columns=['users'])
nodes_df.to_csv("../data/nodes.csv")

In [16]:
#Creation Dataframe Edges :
edges_df = transactions.drop(['blockHash','blockNumber','input','nonce','transactionIndex'],1)
edges_df.to_csv("../data/edges.csv")

In [18]:
#Creation Graph
G = nx.DiGraph()
G=nx.from_pandas_dataframe(edges_df, 'from', 'to', ['value', 'timestamp','gas','hash'],nx.DiGraph())


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-18-4c23c0a5d52b> in <module>()
      1 #Creation Graph
      2 G = nx.DiGraph()
----> 3 G=nx.from_pandas_dataframe(edges_df, 'from', 'to', ['value', 'timestamp','gas','hash'],nx.DiGraph())

/usr/local/lib/python2.7/dist-packages/networkx/convert_matrix.pyc in from_pandas_dataframe(df, source, target, edge_attr, create_using)
    208 
    209         # Iteration on values returns the rows as Numpy arrays
--> 210         for row in df.values:
    211             g.add_edge(row[src_i], row[tar_i], {i:row[j] for i, j in edge_i})
    212 

/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in values(self)
   2850         int32.
   2851         """
-> 2852         return self.as_matrix()
   2853 
   2854     @property

/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in as_matrix(self, columns)
   2832         self._consolidate_inplace()
   2833         if self._AXIS_REVERSED:
-> 2834             return self._data.as_matrix(columns).T
   2835         return self._data.as_matrix(columns)
   2836 

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in as_matrix(self, items)
   3148             return mgr.blocks[0].get_values()
   3149         else:
-> 3150             return mgr._interleave()
   3151 
   3152     def _interleave(self):

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in _interleave(self)
   3175         for blk in self.blocks:
   3176             rl = blk.mgr_locs
-> 3177             result[rl.indexer] = blk.get_values(dtype)
   3178             itemmask[rl.indexer] = 1
   3179 

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in get_values(self, dtype)
    139         """
    140         if com.is_object_dtype(dtype):
--> 141             return self.values.astype(object)
    142         return self.values
    143 

MemoryError: 

In [ ]: