In [373]:
import requests
import numpy as np
import pandas as pd
from time import sleep

In [374]:
def decode_block(block):
    def get_block(block):
        _ = {
            'height': block['height'],
            'hash': block['hash'],
            'time': block['time'],
            'fee': block['fee'],
            'n_tx': block['n_tx'],
            'size': block['size']
        }
        return _
    
    def get(tx):
        return (tx['addr'], float(tx['value']))

    def not_reward(x):
        return 'prev_out' in x.keys()
    
    b = get_block(block)

    block_hash = b['hash']
    block_height = b['height']

    index = 0
    txs = []
    for tx in block['tx']:
        inputs = [ get(_tx['prev_out']) for _tx in tx['inputs'] if not_reward(_tx) ]
        outputs = [ get(_tx) for _tx in tx['out'] if float(_tx['value']) > 0 ]
        _ = {
            'block': block_height,
            'index': index,
            'time': tx['time'],
            'inputs': inputs,
            'outputs': outputs
        }

        txs.append(_)
        index += 1
    
    return b, txs

def get_block_at(height = 0):
    url = 'https://blockchain.info/block-height/' + str(height) + '?format=json'
    blocks = requests.get(url).json()
    main_block = [b for b in blocks['blocks'] if b['main_chain'] == True][0]
    return decode_block(main_block)

In [375]:
def get_blocks_by_interval(interval = [], timeout=.3):
    
    blocks = []
    txs = []
    
    try:
        for height in interval:
            b,ts = get_block_at(height)

            blocks.append(b)
            for t in ts:
                txs.append(t)
                
            sleep(timeout)
    except:
        print('error ... ')
    
    return blocks, txs

In [387]:
## pull blocks by height ... example poc ...

In [377]:
blocks, txs = get_blocks_by_interval([0,1,2])

In [378]:
df_blocks = pd.DataFrame(blocks).set_index(['height'])
df_txs = pd.DataFrame(txs).set_index(['block','index'])

In [379]:
df_blocks.head()


Out[379]:
fee hash n_tx size time
height
0 0 000000000019d6689c085ae165831e934ff763ae46a2a6... 1 285 1231006505
1 0 00000000839a8e6886ab5951d76f411475428afc90947e... 1 215 1231469665
2 0 000000006a625f06636b8bb6ac7b960a8d03705d1ace08... 1 215 1231469744

In [380]:
df_blocks.tail()


Out[380]:
fee hash n_tx size time
height
0 0 000000000019d6689c085ae165831e934ff763ae46a2a6... 1 285 1231006505
1 0 00000000839a8e6886ab5951d76f411475428afc90947e... 1 215 1231469665
2 0 000000006a625f06636b8bb6ac7b960a8d03705d1ace08... 1 215 1231469744

In [381]:
df_txs.head()


Out[381]:
inputs outputs time
block index
0 0 [] [(1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa, 50000000... 1231006505
1 0 [] [(12c6DSiU4Rq3P4ZxziKxzrL5LmMBrzjrJX, 50000000... 1231469665
2 0 [] [(1HLoD9E4SDFFPDiYfNYnkBLQ85Y51J3Zb1, 50000000... 1231469744

In [382]:
df_txs.tail()


Out[382]:
inputs outputs time
block index
0 0 [] [(1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa, 50000000... 1231006505
1 0 [] [(12c6DSiU4Rq3P4ZxziKxzrL5LmMBrzjrJX, 50000000... 1231469665
2 0 [] [(1HLoD9E4SDFFPDiYfNYnkBLQ85Y51J3Zb1, 50000000... 1231469744

In [386]:
## pull blocks by interval ... example poc ...

In [383]:
file_blocks = './/data//blocks.csv'
file_transactions = './/data//tx.csv'

def write_to_file(df_blocks, df_txs):
    df_blocks.to_csv(file_blocks, header=True)
    df_txs.to_csv(file_transactions, header=True)

def append_to_file(df_blocks, df_txs):
    with open(file_blocks, 'a') as f:
        df_blocks.to_csv(f, header=False)
    
    with open(file_transactions, 'a') as f:
        df_txs.to_csv(f, header=False)

# options:
b_ = 0
e_ = 2
i_ = 1

period = 5

write_first = True

for start in np.arange(b_, e_, i_):
    s_ = start*period
    e_ = s_+period
    
    interval = np.arange(s_, e_, 1)
    blocks, txs = get_blocks_by_interval(interval)
    
    df_blocks = pd.DataFrame(blocks).set_index(['height'])
    df_txs = pd.DataFrame(txs).set_index(['block','index'])
    
    if write_first:
        write_to_file(df_blocks, df_txs)
        write_first = False
    else:
        append_to_file(df_blocks, df_txs)

In [384]:
pd.read_csv(file_blocks, index_col=['height'])


Out[384]:
fee hash n_tx size time
height
0 0 000000000019d6689c085ae165831e934ff763ae46a2a6... 1 285 1231006505
1 0 00000000839a8e6886ab5951d76f411475428afc90947e... 1 215 1231469665
2 0 000000006a625f06636b8bb6ac7b960a8d03705d1ace08... 1 215 1231469744
3 0 0000000082b5015589a3fdf2d4baff403e6f0be035a5d9... 1 215 1231470173
4 0 000000004ebadb55ee9096c9a2f8880e09da59c0d68b1c... 1 215 1231470988
5 0 000000009b7262315dbf071787ad3656097b892abffd1f... 1 215 1231471428
6 0 000000003031a0e73735690c5a1ff2a4be82553b2a12b7... 1 215 1231471789
7 0 0000000071966c2b1d065fd446b1e485b2c9d9594acd20... 1 215 1231472369
8 0 00000000408c48f847aa786c2268fc3e6ec2af68e8468a... 1 215 1231472743
9 0 000000008d9dc510f23c2657fc4f67bea30078cc05a90e... 1 215 1231473279

In [385]:
pd.read_csv(file_transactions, index_col=['block','index'])


Out[385]:
inputs outputs time
block index
0 0 [] [('1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa', 500000... 1231006505
1 0 [] [('12c6DSiU4Rq3P4ZxziKxzrL5LmMBrzjrJX', 500000... 1231469665
2 0 [] [('1HLoD9E4SDFFPDiYfNYnkBLQ85Y51J3Zb1', 500000... 1231469744
3 0 [] [('1FvzCLoTPGANNjWoUo6jUGuAG3wg1w4YjR', 500000... 1231470173
4 0 [] [('15ubicBBWFnvoZLT7GiU2qxjRaKJPdkDMG', 500000... 1231470988
5 0 [] [('1JfbZRwdDHKZmuiZgYArJZhcuuzuw2HuMu', 500000... 1231471428
6 0 [] [('1GkQmKAmHtNfnD3LHhTkewJxKHVSta4m2a', 500000... 1231471789
7 0 [] [('16LoW7y83wtawMg5XmT4M3Q7EdjjUmenjM', 500000... 1231472369
8 0 [] [('1J6PYEzr4CUoGbnXrELyHszoTSz3wCsCaj', 500000... 1231472743
9 0 [] [('12cbQLTFMXRnSzktFkuoG3eHoMeFtpTu3S', 500000... 1231473279