In [62]:
import pandas as pd
import os
import json
from pprint import pprint
from pandas.io.json import json_normalize

In [64]:
read_address = "../viz-data/"
write_address = "../csv-viz-data/"
file_names = os.listdir(read_address);

In [65]:
# This function was being used for a specific task
def get_single_data(data):
    dt_ = []
    nodes = data['nodes']
    edges = data['edges']
    for i in range(0,len(nodes)):
        dt_.append([nodes[i]['data']['properties']['name'], nodes[i]['x'], nodes[i]['y']])
    return pd.DataFrame(dt_)
# This is to drop the data in csvs
def get_normalized_data(data, addr):
    dt_ = []
    nodes = data['nodes']
    edges = data['edges']
    nodes = json_normalize(nodes)
    edges = json_normalize(edges)
    nodes.to_csv(addr+'-nodes.csv',encoding='utf-8')
    edges.to_csv(addr+'-edges.csv',encoding='utf-8')
    return nodes, edges

In [67]:
final_data = pd.DataFrame();
for i in range(0,len(file_names)):
    file_addr = read_address+file_names[i]
    with open(file_addr) as data_file:    
        data = json.load(data_file)
    dt_ = get_single_data(data)
    temp_file = file_names[i].split('.')[0]
    get_normalized_data(data,write_address+temp_file)
    final_data = pd.concat([final_data, dt_])

In [68]:
data


Out[68]:
{u'edges': [{u'active': False,
   u'color': u'#8a6960',
   u'data': {u'properties': {}, u'type': u'registered'},
   u'hover': False,
   u'id': 617702,
   u'label': u'registered',
   u'read_cam0:size': 2,
   u'renderer1:size': 12.619146889603865,
   u'size': 1,
   u'source': 588376,
   u'target': 215525},
  {u'cc': {u'y': None},
   u'color': u'#d48a60',
   u'data': {u'properties': {u'date_end': u'',
     u'date_start': u'14/12/1999',
     u'name': u'Director'},
    u'type': u'is officer of'},
   u'hover': False,
   u'id': 69458,
   u'label': u'Director',
   u'read_cam0:size': 2,
   u'renderer1:size': 12.619146889603865,
   u'size': 1,
   u'source': 667735,
   u'target': 215525,
   u'type': u'curvedArrow'},
  {u'cc': {u'y': 2},
   u'color': u'#d48a60',
   u'data': {u'properties': {u'date_end': u'',
     u'date_start': u'14/12/1999',
     u'name': u'Secretary'},
    u'type': u'is officer of'},
   u'hover': False,
   u'id': 69459,
   u'label': u'Secretary',
   u'read_cam0:size': 2,
   u'renderer1:size': 12.619146889603865,
   u'size': 1,
   u'source': 667735,
   u'target': 215525,
   u'type': u'curvedArrow'}],
 u'nodes': [{u'active': True,
   u'color': u'#420e00',
   u'colors': {u'0': u'#420e00'},
   u'data': {u'categories': {u'0': u'Company'},
    u'properties': {u'file_number': u'18898',
     u'inactivationDate': u'04-APR-2003',
     u'jurisdiction': u'PMA',
     u'name': u'TREI INVESTMENTS CORP.',
     u'registrationDate': u'10-DEC-1999',
     u'status': u'REN',
     u'struck_off_date': u'10-MAR-2001'},
    u'statistics': {u'digest': {u'0': {u'edgeType': u'is officer of',
       u'edges': 2,
       u'nodeCategories': {u'0': u'Officer'},
       u'nodes': 1},
      u'1': {u'edgeType': u'registered',
       u'edges': 1,
       u'nodeCategories': {u'0': u'Client'},
       u'nodes': 1}},
     u'edgeCount': 3,
     u'hiddenEdgeCount': 0,
     u'visibleEdgeCount': 3}},
   u'fa2_x': -0.29408618807792664,
   u'fa2_y': 1.4750128984451294,
   u'fixed': True,
   u'geo': {u'latitudeDiff': 0, u'longitudeDiff': 0},
   u'glyphs': {u'0': {u'content': u'0',
     u'draw': False,
     u'position': u'top-right'},
    u'1': {u'content': u'\uf08d',
     u'draw': True,
     u'font': u'FontAwesome',
     u'position': u'bottom-right',
     u'textColor': u'#000'}},
   u'icon': {u'color': u'#fff',
    u'content': u'\uf0f7',
    u'font': u'FontAwesome',
    u'scale': 1},
   u'id': 215525,
   u'label': u'TREI INVESTMENTS CORP.',
   u'nodelink': {u'x': -7.799, u'y': -6.149},
   u'read_cam0:size': 5,
   u'read_cam0:x': 7.052376556396484,
   u'read_cam0:y': -15.9698992729187,
   u'renderer1:size': 31.547867224009664,
   u'renderer1:x': 1356.5,
   u'renderer1:y': 207,
   u'selected': True,
   u'size': 1,
   u'x': 7.052376556396484,
   u'y': -15.9698992729187},
  {u'active': False,
   u'color': u'#851d00',
   u'colors': {u'0': u'#851d00'},
   u'data': {u'categories': {u'0': u'Client'},
    u'properties': {u'active_since': u'05-SEP-1995',
     u'activity': u'STOCK EXCHANGE',
     u'city': u'LIMA',
     u'classification': u'Special rates',
     u'client_name': u'MOSSACK FONSECA & CO. (PERU) CORP.',
     u'client_number': u'7879',
     u'compliance_classification': u'INTERMEDIARY',
     u'country': u'PERU',
     u'cross_reference': u'',
     u'former_name': u'ARGENTA INTERNATIONAL LIMITED',
     u'name': u'MOSSACK FONSECA & CO. (PERU) CORP.',
     u'prospect_date': u'',
     u'region': u'REPRESENTATIVE CLIENTS',
     u'status': u'ACTIVE',
     u'subclassification': u'Representative'},
    u'statistics': {u'digest': {u'0': {u'edgeType': u'registered',
       u'edges': 2055,
       u'nodeCategories': {u'0': u'Company'},
       u'nodes': 2055}},
     u'edgeCount': 2055,
     u'hiddenEdgeCount': 2054,
     u'visibleEdgeCount': 1}},
   u'fa2_x': -11.027679443359375,
   u'fa2_y': -8.318906784057617,
   u'fixed': True,
   u'geo': {u'latitudeDiff': 0, u'longitudeDiff': 0},
   u'glyphs': {u'0': {u'content': u'2.054k',
     u'draw': True,
     u'position': u'top-right'},
    u'1': {u'content': u'\uf08d',
     u'draw': True,
     u'font': u'FontAwesome',
     u'position': u'bottom-right',
     u'textColor': u'#000'}},
   u'icon': {u'color': u'#fff',
    u'content': u'\uf19c',
    u'font': u'FontAwesome',
    u'scale': 1},
   u'id': 588376,
   u'label': u'MOSSACK FONSECA & CO. (PERU) CORP.',
   u'nodelink': {u'x': 11.654, u'y': 9.189},
   u'read_cam0:size': 5,
   u'read_cam0:x': -21.347623443603517,
   u'read_cam0:y': -1.5698992729187022,
   u'renderer1:size': 31.547867224009664,
   u'renderer1:x': 1072.5,
   u'renderer1:y': 351,
   u'size': 1,
   u'x': -21.347623443603517,
   u'y': -1.5698992729187022},
  {u'color': u'#be4400',
   u'colors': {u'0': u'#be4400'},
   u'data': {u'categories': {u'0': u'Officer'},
    u'properties': {u'name': u'C\xe9sar Almeyda'},
    u'statistics': {u'digest': {u'0': {u'edgeType': u'is officer of',
       u'edges': 2,
       u'nodeCategories': {u'0': u'Company'},
       u'nodes': 1}},
     u'edgeCount': 2,
     u'hiddenEdgeCount': 0,
     u'visibleEdgeCount': 2}},
   u'fa2_x': 16.532432556152344,
   u'fa2_y': 15.079108238220215,
   u'fixed': True,
   u'glyphs': {u'0': {u'content': u'0',
     u'draw': False,
     u'position': u'top-right'},
    u'1': {u'content': u'\uf08d',
     u'draw': True,
     u'font': u'FontAwesome',
     u'position': u'bottom-right',
     u'textColor': u'#000'}},
   u'icon': {u'color': u'#fff',
    u'content': u'\uf007',
    u'font': u'FontAwesome',
    u'scale': 1},
   u'id': 667735,
   u'label': u'C\xe9sar Almeyda',
   u'read_cam0:size': 5,
   u'read_cam0:x': 20.15237655639649,
   u'read_cam0:y': 14.430100727081296,
   u'renderer1:size': 31.547867224009664,
   u'renderer1:x': 1487.5,
   u'renderer1:y': 511,
   u'size': 1,
   u'x': 20.15237655639649,
   u'y': 14.430100727081296}]}

In [70]:
nodes = data['nodes']
edges = data['edges']

In [71]:
json_normalize(nodes)


Out[71]:
active color colors.0 data.categories.0 data.properties.active_since data.properties.activity data.properties.city data.properties.classification data.properties.client_name data.properties.client_number ... read_cam0:size read_cam0:x read_cam0:y renderer1:size renderer1:x renderer1:y selected size x y
0 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 7.052377 -15.969899 31.547867 1356.5 207 True 1 7.052377 -15.969899
1 False #851d00 #851d00 Client 05-SEP-1995 STOCK EXCHANGE LIMA Special rates MOSSACK FONSECA & CO. (PERU) CORP. 7879 ... 5 -21.347623 -1.569899 31.547867 1072.5 351 NaN 1 -21.347623 -1.569899
2 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 20.152377 14.430101 31.547867 1487.5 511 NaN 1 20.152377 14.430101

3 rows × 66 columns


In [79]:
final_nodes = pd.DataFrame()
final_edges = pd.DataFrame()
for i in range(0,len(file_names)):
    file_addr = read_address+file_names[i]
    with open(file_addr) as data_file:    
        data = json.load(data_file)
    temp_file = file_names[i].split('.')[0]
    nodes, edges = get_normalized_data(data,write_address+temp_file)
    final_nodes = pd.concat([final_nodes, nodes])
    final_edges = pd.concat([final_edges, edges])

In [74]:
nodes


Out[74]:
active color colors.0 data.categories.0 data.properties.active_since data.properties.activity data.properties.ceasedmembership data.properties.certificateNumber data.properties.citizenship data.properties.city ... read_cam0:size read_cam0:x read_cam0:y renderer1:size renderer1:x renderer1:y selected size x y
0 False #be4400 #be4400 Officer NaN NaN 4 NO NaN ... 5 35.000000 -14.700000 31.547867 1535.145000 389.187099 True 1 35.000000 -14.700000
1 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -16.029000 -14.318000 31.547867 1024.855000 393.007099 NaN 1 -16.029000 -14.318000
2 False #937c6f #937c6f Address NaN NaN NaN NaN NaN NaN ... 5 18.631311 -31.809766 31.547867 1371.458110 218.089444 NaN 1 18.631311 -31.809766
3 NaN #851d00 #851d00 Client 20-MAY-2004 CORPORATE & BUSINESS SERVICES NaN NaN NaN HONG KONG ... 5 8.317242 -0.627654 31.547867 1268.317417 529.910556 NaN 1 8.317242 -0.627654
4 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 -6.868689 -34.809766 31.547867 1116.458110 188.089444 NaN 1 -6.868689 -34.809766

5 rows × 81 columns


In [81]:
final_nodes


Out[81]:
active color colors.0 data.categories.0 data.properties.active_since data.properties.activity data.properties.ceasedmembership data.properties.certificateNumber data.properties.citizenship data.properties.city ... read_cam0:size read_cam0:x read_cam0:y renderer1:size renderer1:x renderer1:y selected size x y
0 False #be4400 #be4400 Officer NaN NaN 4 NO NaN ... 5 35.000000 -14.700000 31.547867 1535.145000 389.187099 True 1 35.000000 -14.700000
1 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -16.029000 -14.318000 31.547867 1024.855000 393.007099 NaN 1 -16.029000 -14.318000
2 False #937c6f #937c6f Address NaN NaN NaN NaN NaN NaN ... 5 18.631311 -31.809766 31.547867 1371.458110 218.089444 NaN 1 18.631311 -31.809766
3 NaN #851d00 #851d00 Client 20-MAY-2004 CORPORATE & BUSINESS SERVICES NaN NaN NaN HONG KONG ... 5 8.317242 -0.627654 31.547867 1268.317417 529.910556 NaN 1 8.317242 -0.627654
4 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 -6.868689 -34.809766 31.547867 1116.458110 188.089444 NaN 1 -6.868689 -34.809766
0 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -8.000000 14.100000 31.547867 1354.630000 547.000000 NaN 1 -8.000000 14.100000
1 False #be4400 #be4400 Officer NaN NaN 1 NO NaN ... 5 -34.386000 -2.376000 31.547867 1090.770000 382.240000 NaN 1 -34.386000 -2.376000
2 False #851d00 #851d00 Client NaN NaN NaN NaN NaN NaN ... 5 3.460000 -0.176000 31.547867 1469.230000 404.240000 NaN 1 3.460000 -0.176000
3 False #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 -4.400000 -23.500000 31.547867 1390.630000 171.000000 NaN 1 -4.400000 -23.500000
0 False #be4400 #be4400 Officer NaN NaN 7 NO NaN ... 5 3.539448 -4.690944 19.182718 1254.178310 226.225960 NaN 1 3.539448 -4.690944
1 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -13.575000 -7.602000 19.182718 1162.284120 210.595356 True 1 -13.575000 -7.602000
2 False #937c6f #937c6f Address NaN NaN NaN NaN NaN NaN ... 5 72.057000 4.953000 19.182718 1622.075953 278.008081 NaN 1 72.057000 4.953000
3 False #851d00 #851d00 Client 08-MAR-2012 TAX ADVISOR NaN NaN NaN WARSAW ... 5 -55.360000 -30.649000 19.182718 937.924047 86.846963 NaN 1 -55.360000 -30.649000
4 False #be4400 #be4400 Officer NaN NaN 30-09-2014 6 NO NaN ... 5 20.153000 59.855000 19.182718 1343.382996 572.798477 NaN 1 20.153000 59.855000
5 False #be4400 #be4400 Officer NaN NaN 12-02-2014 1 NO NaN ... 5 19.591000 50.111000 19.182718 1340.365397 520.479115 NaN 1 19.591000 50.111000
6 False #be4400 #be4400 Officer NaN NaN 12-02-2014 2 NO NaN ... 5 20.153000 70.723000 19.182718 1343.382996 631.153037 NaN 1 20.153000 70.723000
7 False #be4400 #be4400 Officer NaN NaN 12-02-2014 3 NO NaN ... 5 18.842000 22.754000 19.182718 1336.343722 373.588640 NaN 1 18.842000 22.754000
8 False #be4400 #be4400 Officer NaN NaN 12-02-2014 4 NO NaN ... 5 19.216000 32.123000 19.182718 1338.351875 423.894480 NaN 1 19.216000 32.123000
9 False #be4400 #be4400 Officer NaN NaN 12-02-2014 5 NO NaN ... 5 19.404000 40.930000 19.182718 1339.361321 471.182721 NaN 1 19.404000 40.930000
10 False #be4400 #be4400 Officer NaN NaN NaN NaN NO NaN ... 5 18.654000 13.760000 19.182718 1335.334276 325.296322 NaN 1 18.654000 13.760000
0 False #be4400 #be4400 Officer NaN NaN 7 NO NaN ... 5 6.909523 -31.677975 28.052787 1355.591950 159.383918 NaN 1 6.909523 -31.677975
1 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -53.921163 -38.187532 28.052787 830.324811 103.174520 NaN 1 -53.921163 -38.187532
2 False #937c6f #937c6f Address NaN NaN NaN NaN NaN NaN ... 5 55.170031 -32.351378 28.052787 1772.316802 153.569153 NaN 1 55.170031 -32.351378
3 False #be4400 #be4400 Officer NaN NaN 1 NO NaN ... 5 4.664849 -1.150398 28.052787 1336.209399 422.986615 True 1 4.664849 -1.150398
4 False #be4400 #be4400 Officer NaN NaN 6 NO NaN ... 5 6.011654 -16.414187 28.052787 1347.838929 291.185267 NaN 1 6.011654 -16.414187
5 False #be4400 #be4400 Officer NaN NaN 10-09-2014 4 NO NaN ... 5 4.889316 -8.782293 28.052787 1338.147654 357.085941 NaN 1 4.889316 -8.782293
6 False #be4400 #be4400 Officer NaN NaN 10-09-2014 5 NO NaN ... 5 6.236121 -23.372679 28.052787 1349.777184 231.099358 NaN 1 6.236121 -23.372679
7 False #851d00 #851d00 Client 16-MAR-2011 CORPORATE & BUSINESS SERVICES NaN NaN NaN TSIM SHA TSUI ... 5 -61.328590 0.196407 28.052787 766.362392 434.616146 NaN 1 -61.328590 0.196407
8 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -26.594000 20.967000 28.052787 1066.292243 613.968225 NaN 1 -26.594000 20.967000
9 False #be4400 #be4400 Officer NaN NaN 2 NO NaN ... 5 20.552548 21.066278 28.052787 1473.398160 614.825480 NaN 1 20.552548 21.066278
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 14.600000 30.300000 14.895528 1265.525913 521.797019 NaN 1 14.600000 30.300000
6 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 12.400000 -55.700000 14.895528 1256.915380 185.203447 NaN 1 12.400000 -55.700000
7 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 15.600000 37.900000 14.895528 1269.439792 551.542498 NaN 1 15.600000 37.900000
8 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 17.600000 45.300000 14.895528 1277.267550 580.505201 NaN 1 17.600000 45.300000
9 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 15.800000 -9.100000 14.895528 1270.222568 367.590197 NaN 1 15.800000 -9.100000
10 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 15.000000 -17.900000 14.895528 1267.091465 333.148064 NaN 1 15.000000 -17.900000
11 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 12.000000 -46.300000 14.895528 1255.349829 221.993907 NaN 1 12.000000 -46.300000
12 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 18.200000 53.100000 14.895528 1279.615877 611.033455 NaN 1 18.200000 53.100000
13 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 12.800000 -36.100000 14.895528 1258.480932 261.915470 NaN 1 12.800000 -36.100000
14 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 14.600000 6.900000 14.895528 1265.525913 430.212257 NaN 1 14.600000 6.900000
15 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 13.400000 -28.100000 14.895528 1260.829259 293.226500 NaN 1 13.400000 -28.100000
16 False #be4400 #be4400 Officer NaN NaN NaN NaN NO NaN ... 5 99.373031 47.536300 14.895528 1597.317278 589.257808 NaN 1 99.373031 47.536300
17 False #be4400 #be4400 Officer NaN NaN NaN NaN NO NaN ... 5 -69.582677 51.914570 14.895528 936.045123 606.393824 NaN 1 -69.582677 51.914570
18 False #be4400 #be4400 Officer NaN NaN 16-08-2013 1 NO NaN ... 5 106.178965 -10.423909 14.895528 1623.954877 362.408576 NaN 1 106.178965 -10.423909
19 False #be4400 #be4400 Officer NaN NaN 23-08-2013 1 NO NaN ... 5 -50.978848 -82.689608 14.895528 1008.858255 79.569394 NaN 1 -50.978848 -82.689608
0 NaN #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 19.600000 -8.100000 31.547867 1311.000000 373.000000 NaN 1 19.600000 -8.100000
1 NaN #be4400 #be4400 Officer NaN NaN 1 NO NaN ... 5 36.200000 9.300000 31.547867 1477.000000 547.000000 NaN 1 36.200000 9.300000
2 NaN #be4400 #be4400 Officer NaN NaN 2 NO NaN ... 5 38.200000 -28.300000 31.547867 1497.000000 171.000000 NaN 1 38.200000 -28.300000
3 NaN #851d00 #851d00 Client 09-JUN-2010 ACCOUNTANT / CONSULTANTS NaN NaN NaN MONTEVIDEO ... 5 -2.200000 7.700000 31.547867 1093.000000 531.000000 NaN 1 -2.200000 7.700000
4 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 -5.200000 -28.100000 31.547867 1063.000000 173.000000 NaN 1 -5.200000 -28.100000
0 False #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 -10.200000 -8.900000 31.547867 1048.000000 300.000000 NaN 1 -10.200000 -8.900000
1 False #be4400 #be4400 Officer NaN NaN 9 NO NaN ... 5 2.400000 13.300000 31.547867 1174.000000 522.000000 NaN 1 2.400000 13.300000
2 False #937c6f #937c6f Address NaN NaN NaN NaN NaN NaN ... 5 36.200000 -6.700000 31.547867 1512.000000 322.000000 NaN 1 36.200000 -6.700000
3 False #851d00 #851d00 Client 25-SEP-1992 CO. FORMATION SERVICE NaN NaN NaN LONDON ... 5 15.600000 -19.300000 31.547867 1306.000000 196.000000 NaN 1 15.600000 -19.300000
0 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 4.175474 -6.075677 31.547867 1413.000000 267.000000 True 1 4.175474 -6.075677
1 False #851d00 #851d00 Client 30-SEP-1993 ATTORNEY NaN NaN NaN MADRID ... 5 -0.824526 12.324323 31.547867 1363.000000 451.000000 NaN 1 -0.824526 12.324323
2 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 -22.424526 5.524323 31.547867 1147.000000 383.000000 NaN 1 -22.424526 5.524323
0 True #420e00 #420e00 Company NaN NaN NaN NaN NaN NaN ... 5 7.052377 -15.969899 31.547867 1356.500000 207.000000 True 1 7.052377 -15.969899
1 False #851d00 #851d00 Client 05-SEP-1995 STOCK EXCHANGE NaN NaN NaN LIMA ... 5 -21.347623 -1.569899 31.547867 1072.500000 351.000000 NaN 1 -21.347623 -1.569899
2 NaN #be4400 #be4400 Officer NaN NaN NaN NaN NaN NaN ... 5 20.152377 14.430101 31.547867 1487.500000 511.000000 NaN 1 20.152377 14.430101

710 rows × 86 columns


In [ ]: