Collect block data from etherchain API


In [4]:
# import modules
from pymongo import MongoClient
import pandas as pd
import requests
import time

In [5]:
# connect to the hosted MongoDB instance
client = MongoClient()
db = client['etherchain']
collection = db['blocks']
# the transactions with be uniquely identfied by their unique blockid, preventing duplicates
collection.create_index([('number', 1)], unique=True)


Out[5]:
u'number_1'

In [6]:
def collect_blocks(count, offset):
    """
    DESCRIPTION:
        Collects block data from etherchain.org API
        https://etherchain.org/documentation/api/
    
    INPUT:
        - offset: the number of block ids to skip
        - count: the number of blocks to return (max 100 blocks per request)
        - sleeptime: number of seconds to sleep between api requests
    OUTPUT:
        - stores each blockchain block as a document in a MongoDB collection
    """
    
    BASE_URL = 'https://etherchain.org/api/blocks/{}/{}'.format(offset, count)
    r = requests.get(BASE_URL)

    if r.status_code != 200:
        print('status code = {}'.format(r.status_code))

    else:
    # store each block in our mongo database
        r = convert_price(r.json())

        for row in range(len(r['data'])):
            try:

                collection.insert_one(r['data'][row])

            except Exception as e:
                print e
                #print(r['data'][row])
                #pass
        
def convert_price(json_dict):
    
    for tx in json_dict['data']:
        tx['size'] = float(tx['size'])
        tx['tx_count'] = float(tx['tx_count'])
        tx['uncle_count'] = float(tx['uncle_count'])
        tx['reward'] = float(tx['reward'])
        tx['totalFee'] = float(tx['totalFee'])
        tx['difficulty'] = float(tx['difficulty'])
    
        return json_dict

def call_api_blocks():
    
    offset = 1253000
    while True:
        collect_blocks(100, offset)
        offset += 200
        if offset % 1000 == 0:
            print('offset: {}'.format(offset))
        time.sleep(20)

In [13]:
call_api_blocks()


E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059637 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059636 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059635 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059634 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059633 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059632 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059631 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059630 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059629 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059628 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059627 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059626 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059625 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059624 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059623 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059622 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059621 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059620 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059619 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059618 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059617 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059616 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059615 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059614 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059613 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059612 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059611 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059610 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059609 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059608 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059607 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059606 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059605 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059604 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059603 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059602 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059601 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059600 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059599 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059598 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059597 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059596 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059595 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059594 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059593 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059592 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059591 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059590 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059589 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059588 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059587 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059586 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059585 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059584 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059583 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059582 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059581 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059580 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059579 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059578 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059577 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059576 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059575 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059574 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059573 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059572 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059571 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059570 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059569 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059437 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059436 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059435 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059434 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059433 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059432 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059431 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059430 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059429 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059428 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059427 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059426 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059425 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059424 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059423 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059422 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059421 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059420 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059419 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059418 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059417 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059416 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059415 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059414 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059413 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059412 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059411 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059410 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059409 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059408 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059407 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059406 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059405 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059404 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059403 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059402 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059401 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059400 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059399 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059398 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059397 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059396 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059395 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059394 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059393 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059392 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059391 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059390 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059389 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059388 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059387 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059386 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059385 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059384 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059383 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059382 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059381 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059380 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059379 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059378 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059377 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059376 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059375 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059374 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059373 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059372 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059371 }
E11000 duplicate key error collection: etherchain.blocks_2 index: number_1 dup key: { : 3059370 }
offset: 1254000
offset: 1255000
offset: 1256000
offset: 1257000
offset: 1258000
offset: 1259000
offset: 1260000
offset: 1261000
offset: 1262000
offset: 1263000
offset: 1264000
offset: 1265000
offset: 1266000
offset: 1267000
offset: 1268000
offset: 1269000
offset: 1270000
offset: 1271000
offset: 1272000
offset: 1273000
offset: 1274000
offset: 1275000
offset: 1276000
offset: 1277000
offset: 1278000
offset: 1279000
offset: 1280000
offset: 1281000
offset: 1282000
offset: 1283000
offset: 1284000
offset: 1285000
offset: 1286000
offset: 1287000
offset: 1288000
offset: 1289000
offset: 1290000
offset: 1291000
offset: 1292000
offset: 1293000
offset: 1294000
offset: 1295000
offset: 1296000
offset: 1297000
offset: 1298000
offset: 1299000
offset: 1300000
offset: 1301000
offset: 1302000
offset: 1303000
offset: 1304000
offset: 1305000
offset: 1306000
offset: 1307000
offset: 1308000
offset: 1309000
offset: 1310000
offset: 1311000
offset: 1312000
offset: 1313000
offset: 1314000
offset: 1315000
offset: 1316000
offset: 1317000
offset: 1318000
offset: 1319000
offset: 1320000
offset: 1321000
offset: 1322000
offset: 1323000
offset: 1324000
offset: 1325000
offset: 1326000
offset: 1327000
offset: 1328000
offset: 1329000
offset: 1330000
offset: 1331000
offset: 1332000
offset: 1333000
offset: 1334000
offset: 1335000
offset: 1336000
offset: 1337000
offset: 1338000
offset: 1339000
offset: 1340000
offset: 1341000
offset: 1342000
offset: 1343000
offset: 1344000
offset: 1345000
offset: 1346000
offset: 1347000
offset: 1348000
offset: 1349000
offset: 1350000
offset: 1351000
offset: 1352000
offset: 1353000
offset: 1354000
offset: 1355000
offset: 1356000
offset: 1357000
offset: 1358000
offset: 1359000
offset: 1360000
offset: 1361000
offset: 1362000
offset: 1363000
offset: 1364000
offset: 1365000
offset: 1366000
offset: 1367000
offset: 1368000
offset: 1369000
offset: 1370000
offset: 1371000
offset: 1372000
offset: 1373000
offset: 1374000
offset: 1375000
offset: 1376000
offset: 1377000
offset: 1378000
offset: 1379000
offset: 1380000
offset: 1381000
offset: 1382000
offset: 1383000
offset: 1384000
offset: 1385000
offset: 1386000
offset: 1387000
offset: 1388000
offset: 1389000
offset: 1390000
offset: 1391000
offset: 1392000
offset: 1393000
offset: 1394000
status code = 502
offset: 1395000
offset: 1396000
offset: 1397000
offset: 1398000
offset: 1399000
offset: 1400000
offset: 1401000
offset: 1402000
offset: 1403000
offset: 1404000
offset: 1405000
offset: 1406000
offset: 1407000
offset: 1408000
offset: 1409000
offset: 1410000
offset: 1411000
offset: 1412000
offset: 1413000
offset: 1414000
offset: 1415000
offset: 1416000
offset: 1417000
offset: 1418000
offset: 1419000
offset: 1420000
offset: 1421000
offset: 1422000
offset: 1423000
offset: 1424000
offset: 1425000
offset: 1426000
offset: 1427000
offset: 1428000
offset: 1429000
offset: 1430000
offset: 1431000
offset: 1432000
offset: 1433000
offset: 1434000
offset: 1435000
offset: 1436000
offset: 1437000
offset: 1438000
offset: 1439000
offset: 1440000
offset: 1441000
offset: 1442000
offset: 1443000
offset: 1444000
offset: 1445000
offset: 1446000
offset: 1447000
offset: 1448000
offset: 1449000
offset: 1450000
offset: 1451000
offset: 1452000
offset: 1453000
offset: 1454000
offset: 1455000
offset: 1456000
offset: 1457000
offset: 1458000
offset: 1459000
offset: 1460000
offset: 1461000
offset: 1462000
offset: 1463000
offset: 1464000
offset: 1465000
offset: 1466000
offset: 1467000
offset: 1468000
offset: 1469000
offset: 1470000
offset: 1471000
offset: 1472000
offset: 1473000
offset: 1474000
offset: 1475000
offset: 1476000
offset: 1477000
offset: 1478000
offset: 1479000
offset: 1480000
offset: 1481000
offset: 1482000
offset: 1483000
offset: 1484000
offset: 1485000
offset: 1486000
offset: 1487000
offset: 1488000
offset: 1489000
offset: 1490000
offset: 1491000
status code = 502
status code = 502
offset: 1492000
offset: 1493000
offset: 1494000
offset: 1495000
offset: 1496000
offset: 1497000
offset: 1498000
offset: 1499000
offset: 1500000
offset: 1501000
offset: 1502000
offset: 1503000
---------------------------------------------------------------------------
ConnectionError                           Traceback (most recent call last)
<ipython-input-13-1e1d4cfb1e18> in <module>()
----> 1 call_api_blocks()

<ipython-input-12-94b37534c06c> in call_api_blocks()
     53     offset = 1253000
     54     while True:
---> 55         collect_blocks(100, offset)
     56         offset += 200
     57         if offset % 1000 == 0:

<ipython-input-12-94b37534c06c> in collect_blocks(count, offset)
     14 
     15     BASE_URL = 'https://etherchain.org/api/blocks/{}/{}'.format(offset, count)
---> 16     r = requests.get(BASE_URL)
     17 
     18     if r.status_code != 200:

//anaconda/lib/python2.7/site-packages/requests/api.pyc in get(url, params, **kwargs)
     70 
     71     kwargs.setdefault('allow_redirects', True)
---> 72     return request('get', url, params=params, **kwargs)
     73 
     74 

//anaconda/lib/python2.7/site-packages/requests/api.pyc in request(method, url, **kwargs)
     56     # cases, and look like a memory leak in others.
     57     with sessions.Session() as session:
---> 58         return session.request(method=method, url=url, **kwargs)
     59 
     60 

//anaconda/lib/python2.7/site-packages/requests/sessions.pyc in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    506         }
    507         send_kwargs.update(settings)
--> 508         resp = self.send(prep, **send_kwargs)
    509 
    510         return resp

//anaconda/lib/python2.7/site-packages/requests/sessions.pyc in send(self, request, **kwargs)
    616 
    617         # Send the request
--> 618         r = adapter.send(request, **kwargs)
    619 
    620         # Total elapsed time of the request (approximately)

//anaconda/lib/python2.7/site-packages/requests/adapters.pyc in send(self, request, stream, timeout, verify, cert, proxies)
    506                 raise SSLError(e, request=request)
    507 
--> 508             raise ConnectionError(e, request=request)
    509 
    510         except ClosedPoolError as e:

ConnectionError: HTTPSConnectionPool(host='etherchain.org', port=443): Max retries exceeded with url: /api/blocks/1503000/100 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x10e7f5190>: Failed to establish a new connection: [Errno 65] No route to host',))

In [ ]: