In [19]:
from graphframes import *
from pyspark.sql import functions as F
from pyspark import Row

#regular Python packs

import os
import re
import xml.etree.ElementTree as ET
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#paths
PATH = "/home/svanhmic/workspace/Python/Erhvervs/data/regnskabsdata/tax/XBRL20130401/20130401/fsa"

In [2]:
files = os.listdir(PATH)
print(files)


['fsa_gla-en.xml', 'fsa-lab-da.xml', 'fsa_gla-da.xml', '401bsr_pre.xml', '700scl_pre.xml', '400bsa_pre.xml', '500isf_pre.xml', '701scs_def.xml', 'fsa-lab-en.xml', '8NNdsc_pre.xml', '600cfs_pre.xml', 'fsa-label.xml', '9NNdim_def.xml', '400bsa_def.xml', '8NNdsc_def.xml', '700scl_def.xml', '502pdr_pre.xml', 'fsa.xsd', 'fsa_ref.xml', '501isn_pre.xml', '500isf_def.xml', '401bsr_def.xml', '701scs_pre.xml', '502pdr_def.xml', '600cfs_def.xml', '501isn_def.xml']

In [3]:
rdd = sc.textFile(name=PATH+"/"+files[13])

In [4]:
rdd.take(10)


Out[4]:
['<?xml version="1.0" encoding="UTF-8"?>',
 '<!-- Generated by Fujitsu Interstage XWand B0166 -->',
 '<?officialURI http://archprod.service.eogs.dk/taxonomy/20130401/fsa/400bsa_def.xml?>',
 '',
 '<?taxonomy-version 1.5.0?>',
 '',
 '<link:linkbase xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.xbrl.org/2003/linkbase http://www.xbrl.org/2003/xbrl-linkbase-2003-12-31.xsd" xmlns:link="http://www.xbrl.org/2003/linkbase" xmlns:tch="http://xbrl.dcca.dk/tch" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xbrli="http://www.xbrl.org/2003/instance" xmlns:xbrldt="http://xbrl.org/2005/xbrldt">',
 '  <link:roleRef roleURI="http://xbrl.dcca.dk/role/400.00/BalanceSheetAccountForm" xlink:type="simple" xlink:href="fsa.xsd#fsa_RoleBalanceSheetAccountForm"/>',
 '  <link:roleRef roleURI="http://xbrl.dcca.dk/role/901.01/DimensionConsolidatedSolo" xlink:type="simple" xlink:href="../cmn.xsd#cmn_RoleDimensionConsolidatedSolo"/>',
 '  <link:arcroleRef arcroleURI="http://xbrl.org/int/dim/arcrole/all" xlink:type="simple" xlink:href="http://www.xbrl.org/2005/xbrldt-2005.xsd#all"/>']

In [5]:
cleanRdd = rdd.map(lambda x: x.split()).filter(lambda x: x != [])

In [6]:
def cleanupRdd(rdd,string):
    
    return (rdd
            .filter(lambda x: x[0]== string)
            .map(lambda x: [re.sub(r"\w+:|<\w+:|/>",repl="",string=i) for i in x[1:]])
            .map(lambda x: [re.sub(r'"',repl="",string=i) for i in x])
            .map(lambda x: dict(element.split('=') for element in x if "=" in element))
            .map(lambda x: dict([(k,v) for k,v in x.items() if k in ["href","label","type"]]))
            #.map(lambda x: len(x))
            .map(lambda x: Row(**x))
            .toDF()
           )

In [7]:
vert = cleanupRdd(cleanRdd,"<link:loc")
vert.count()


Out[7]:
173

In [8]:
vert.show(vert.count())#.take(10)


+--------------------+--------------------+-------+
|                href|               label|   type|
+--------------------+--------------------+-------+
|../cmn.xsd#cmn_Co...|     CommonHypercube|locator|
|../cmn.xsd#cmn_Co...|ConsolidatedSoloD...|locator|
|  fsa.xsd#fsa_Assets|              Assets|locator|
|fsa.xsd#fsa_Noncu...|    NoncurrentAssets|locator|
|fsa.xsd#fsa_Intan...|    IntangibleAssets|locator|
|fsa.xsd#fsa_Compl...|CompletedDevelopm...|locator|
|fsa.xsd#fsa_Conce...|ConcessionsOrigin...|locator|
|fsa.xsd#fsa_Paten...|PatentsOriginatin...|locator|
|fsa.xsd#fsa_Trade...|TrademarksOrigina...|locator|
|fsa.xsd#fsa_Other...|OtherSimilarRight...|locator|
|fsa.xsd#fsa_Acqui...|AcquiredIntangibl...|locator|
|fsa.xsd#fsa_Acqui...| AcquiredConcessions|locator|
|fsa.xsd#fsa_Acqui...|     AcquiredPatents|locator|
|fsa.xsd#fsa_Acqui...|    AcquiredLicences|locator|
|fsa.xsd#fsa_Acqui...|  AcquiredTrademarks|locator|
|fsa.xsd#fsa_Acqui...|AcquiredOtherSimi...|locator|
|fsa.xsd#fsa_Goodwill|            Goodwill|locator|
|fsa.xsd#fsa_Devel...|DevelopmentProjec...|locator|
|fsa.xsd#fsa_Devel...|DevelopmentProjec...|locator|
|fsa.xsd#fsa_Prepa...|PrepaymentsForInt...|locator|
|fsa.xsd#fsa_Prope...|PropertyPlantAndE...|locator|
|fsa.xsd#fsa_LandA...|    LandAndBuildings|locator|
|fsa.xsd#fsa_Inves...|  InvestmentProperty|locator|
|fsa.xsd#fsa_Other...|OtherInvestmentAs...|locator|
|fsa.xsd#fsa_Plant...|   PlantAndMachinery|locator|
|fsa.xsd#fsa_Fixtu...|FixturesFittingsT...|locator|
|fsa.xsd#fsa_Lease...|LeaseholdImprovem...|locator|
|fsa.xsd#fsa_Prope...|PropertyPlantAndE...|locator|
|fsa.xsd#fsa_Prope...|PropertyPlantAndE...|locator|
|fsa.xsd#fsa_Prepa...|PrepaymentsForPro...|locator|
|fsa.xsd#fsa_Biolo...|    BiologicalAssets|locator|
|   fsa.xsd#fsa_Ships|               Ships|locator|
|  fsa.xsd#fsa_Planes|              Planes|locator|
|fsa.xsd#fsa_Longt...|LongtermInvestmen...|locator|
|fsa.xsd#fsa_Longt...|LongtermInvestmen...|locator|
|fsa.xsd#fsa_Longt...|LongtermReceivabl...|locator|
|fsa.xsd#fsa_Longt...|LongtermInvestmen...|locator|
|fsa.xsd#fsa_Longt...|LongtermReceivabl...|locator|
|fsa.xsd#fsa_Other...|OtherLongtermInve...|locator|
|fsa.xsd#fsa_Other...|OtherLongtermRece...|locator|
|fsa.xsd#fsa_Longt...|LongtermReceivabl...|locator|
|fsa.xsd#fsa_Noncu...|NoncurrentDeferre...|locator|
|fsa.xsd#fsa_Depos...|DepositsLongtermI...|locator|
|fsa.xsd#fsa_CostE...|CostExceedsIncome...|locator|
|fsa.xsd#fsa_Contr...|ContributedCapita...|locator|
|fsa.xsd#fsa_Curre...|       CurrentAssets|locator|
|fsa.xsd#fsa_Inven...|         Inventories|locator|
|fsa.xsd#fsa_RawMa...|RawMaterialsAndCo...|locator|
|fsa.xsd#fsa_WorkI...|      WorkInProgress|locator|
|fsa.xsd#fsa_Manuf...|ManufacturedGoods...|locator|
|fsa.xsd#fsa_Prepa...| PrepaymentsForGoods|locator|
|fsa.xsd#fsa_Lives...|           Livestock|locator|
|fsa.xsd#fsa_Asset...|  AssetsMeantForSale|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivables|locator|
|fsa.xsd#fsa_Short...|ShorttermTradeRec...|locator|
|fsa.xsd#fsa_Contr...|ContractWorkInPro...|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivab...|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivab...|locator|
|fsa.xsd#fsa_Curre...|CurrentDeferredTa...|locator|
|fsa.xsd#fsa_Other...|OtherShorttermRec...|locator|
|fsa.xsd#fsa_Contr...|ContributedCapita...|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivab...|locator|
|fsa.xsd#fsa_Defer...|DeferredIncomeAssets|locator|
|fsa.xsd#fsa_Short...|ShorttermTaxRecei...|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivab...|locator|
|fsa.xsd#fsa_Short...|ShorttermReceivab...|locator|
|fsa.xsd#fsa_CostE...|CostExceedsIncome...|locator|
|fsa.xsd#fsa_Short...|ShorttermInvestments|locator|
|fsa.xsd#fsa_Short...|ShorttermInvestme...|locator|
|fsa.xsd#fsa_Short...|ShorttermInvestme...|locator|
|fsa.xsd#fsa_Other...|OtherShorttermInv...|locator|
|fsa.xsd#fsa_CashA...|CashAndCashEquiva...|locator|
|  fsa.xsd#fsa_Equity|              Equity|locator|
|fsa.xsd#fsa_Contr...|  ContributedCapital|locator|
|fsa.xsd#fsa_Share...|        SharePremium|locator|
|fsa.xsd#fsa_Reval...|  RevaluationReserve|locator|
|fsa.xsd#fsa_Other...|       OtherReserves|locator|
|fsa.xsd#fsa_Reser...|ReserveForNetReva...|locator|
|fsa.xsd#fsa_Other...|OtherStatutoryRes...|locator|
|fsa.xsd#fsa_Reser...|ReserveAccordingT...|locator|
|fsa.xsd#fsa_RestO...| RestOfOtherReserves|locator|
|fsa.xsd#fsa_Reser...|ReserveForLoansAn...|locator|
|fsa.xsd#fsa_Reser...|ReserveForUnpaidC...|locator|
|fsa.xsd#fsa_Reser...|ReserveForNetReva...|locator|
|fsa.xsd#fsa_Reser...|ReserveForBiologi...|locator|
|fsa.xsd#fsa_Retai...|    RetainedEarnings|locator|
|fsa.xsd#fsa_Distr...|       Distributions|locator|
|fsa.xsd#fsa_Propo...|ProposedDividendR...|locator|
|fsa.xsd#fsa_NotPa...|NotPaidContribute...|locator|
|fsa.xsd#fsa_PaidC...|PaidContributedCa...|locator|
|fsa.xsd#fsa_Hedge...|           HedgeFund|locator|
|fsa.xsd#fsa_Reser...|         ReserveFund|locator|
|fsa.xsd#fsa_Trans...|TransferredToFrom...|locator|
|fsa.xsd#fsa_Propo...|ProposedExtraordi...|locator|
|fsa.xsd#fsa_Liqui...|  LiquidationAccount|locator|
|fsa.xsd#fsa_Balan...|BalanceSheetAccou...|locator|
|fsa.xsd#fsa_Liabi...|LiabilitiesAndEquity|locator|
|fsa.xsd#fsa_Provi...|          Provisions|locator|
|fsa.xsd#fsa_Provi...|ProvisionsForPens...|locator|
|fsa.xsd#fsa_Provi...|ProvisionsForDefe...|locator|
|fsa.xsd#fsa_Other...|     OtherProvisions|locator|
|fsa.xsd#fsa_Provi...|ProvisionsForInve...|locator|
|fsa.xsd#fsa_Provi...|ProvisionsForInve...|locator|
|fsa.xsd#fsa_Provi...|ProvisionsForInco...|locator|
|fsa.xsd#fsa_Liabi...|LiabilitiesOtherT...|locator|
|fsa.xsd#fsa_Longt...|LongtermLiabiliti...|locator|
|fsa.xsd#fsa_Propo...|    ProposedDividend|locator|
|fsa.xsd#fsa_Lease...|    LeaseCommitments|locator|
|fsa.xsd#fsa_Short...|ShorttermLeaseCom...|locator|
|fsa.xsd#fsa_Longt...|LongtermLeaseComm...|locator|
|fsa.xsd#fsa_Negat...|    NegativeGoodwill|locator|
|fsa.xsd#fsa_Short...|ShorttermNegative...|locator|
|fsa.xsd#fsa_Longt...|LongtermNegativeG...|locator|
|fsa.xsd#fsa_Defer...|      DeferredIncome|locator|
|fsa.xsd#fsa_Short...|ShorttermDeferred...|locator|
|fsa.xsd#fsa_Longt...|LongtermDeferredI...|locator|
|fsa.xsd#fsa_Other...|       OtherPayables|locator|
|fsa.xsd#fsa_Other...|OtherShorttermPay...|locator|
|fsa.xsd#fsa_Other...|OtherLongtermPaya...|locator|
|fsa.xsd#fsa_TaxPa...|         TaxPayables|locator|
|fsa.xsd#fsa_Short...|ShorttermTaxPayables|locator|
|fsa.xsd#fsa_Longt...| LongtermTaxPayables|locator|
|fsa.xsd#fsa_Payab...|PayablesToAssociates|locator|
|fsa.xsd#fsa_Short...|ShorttermPayables...|locator|
|fsa.xsd#fsa_Longt...|LongtermPayablesT...|locator|
|fsa.xsd#fsa_Payab...|PayablesToGroupEn...|locator|
|fsa.xsd#fsa_Short...|ShorttermPayables...|locator|
|fsa.xsd#fsa_Longt...|LongtermPayablesT...|locator|
|fsa.xsd#fsa_Trade...|       TradePayables|locator|
|fsa.xsd#fsa_Short...|ShorttermTradePay...|locator|
|fsa.xsd#fsa_Longt...|LongtermTradePaya...|locator|
|fsa.xsd#fsa_Prepa...|PrepaymentsReceiv...|locator|
|fsa.xsd#fsa_Short...|ShorttermPrepayme...|locator|
|fsa.xsd#fsa_Longt...|LongtermPrepaymen...|locator|
|fsa.xsd#fsa_Conve...|ConvertibleProfit...|locator|
|fsa.xsd#fsa_Conve...|ConvertibleProfit...|locator|
|fsa.xsd#fsa_Conve...|ConvertibleProfit...|locator|
|fsa.xsd#fsa_DebtT...|DebtToOtherCredit...|locator|
|fsa.xsd#fsa_Short...|ShorttermDebtToOt...|locator|
|fsa.xsd#fsa_Longt...|LongtermDebtToOth...|locator|
|fsa.xsd#fsa_Other...|OtherDebtRaisedBy...|locator|
|fsa.xsd#fsa_Other...|OtherShorttermDeb...|locator|
|fsa.xsd#fsa_Other...|OtherLongtermDebt...|locator|
|fsa.xsd#fsa_DebtT...|         DebtToBanks|locator|
|fsa.xsd#fsa_Short...|ShorttermDebtToBanks|locator|
|fsa.xsd#fsa_Longt...| LongtermDebtToBanks|locator|
|fsa.xsd#fsa_Mortg...|        MortgageDebt|locator|
|fsa.xsd#fsa_Short...|ShorttermMortgage...|locator|
|fsa.xsd#fsa_Longt...|LongtermMortgageDebt|locator|
|fsa.xsd#fsa_Short...|ShorttermLiabilit...|locator|
|fsa.xsd#fsa_Short...|ShorttermPartOfLo...|locator|
|fsa.xsd#fsa_Equit...|          EquityLoan|locator|
|fsa.xsd#fsa_Longt...|  LongtermEquityLoan|locator|
|fsa.xsd#fsa_Short...| ShorttermEquityLoan|locator|
|fsa.xsd#fsa_Payab...|PayablesToShareho...|locator|
|fsa.xsd#fsa_Short...|ShorttermPayables...|locator|
|fsa.xsd#fsa_Longt...|LongtermPayablesT...|locator|
|fsa.xsd#fsa_Prepa...|PrepaymentsOfWork...|locator|
|fsa.xsd#fsa_Short...|ShorttermPrepayme...|locator|
|fsa.xsd#fsa_Longt...|LongtermPrepaymen...|locator|
|fsa.xsd#fsa_Contr...|ContractWorkInPro...|locator|
|fsa.xsd#fsa_Short...|ShorttermContract...|locator|
|fsa.xsd#fsa_Longt...|LongtermContractW...|locator|
|fsa.xsd#fsa_TaxPa...|TaxPayablesToGrou...|locator|
|fsa.xsd#fsa_Longt...|LongtermTaxPayabl...|locator|
|fsa.xsd#fsa_Short...|ShorttermTaxPayab...|locator|
|fsa.xsd#fsa_Depos...|DepositsLiabiliti...|locator|
|fsa.xsd#fsa_Depos...|DepositsShortterm...|locator|
|fsa.xsd#fsa_Depos...|DepositsLongtermL...|locator|
|fsa.xsd#fsa_Incom...|IncomeExceedCostF...|locator|
|fsa.xsd#fsa_Incom...|IncomeExceedCostF...|locator|
|fsa.xsd#fsa_Incom...|IncomeExceedCostF...|locator|
|fsa.xsd#fsa_Minor...|   MinorityInterests|locator|
+--------------------+--------------------+-------+


In [9]:
edges = (cleanRdd
         .filter(lambda x: x[0] == "<link:definitionArc")
         .map(lambda x: [re.sub(r"\w+:|<\w+:|/>",repl="",string=i) for i in x[1:]])
         .map(lambda x: [re.sub(r'"',repl="",string=i) for i in x])
         .map(lambda x: dict(element.split('=') for element in x if "=" in element))
         .map(lambda x: dict([(k,v)for k,v in x.items() if k in ["from","to","order","type","arcrole"]]))
         #.map(lambda x: len(x))
         .map(lambda x: Row(**x))
         .toDF()
        )

d = edges.count()
print(d)
edges.orderBy("from","order").show(d)


172
+--------------------+--------------------+-----+--------------------+----+
|             arcrole|                from|order|                  to|type|
+--------------------+--------------------+-----+--------------------+----+
|//xbrl.org/int/di...|AcquiredIntangibl...| 10.0| AcquiredConcessions| arc|
|//xbrl.org/int/di...|AcquiredIntangibl...| 20.0|     AcquiredPatents| arc|
|//xbrl.org/int/di...|AcquiredIntangibl...| 30.0|    AcquiredLicences| arc|
|//xbrl.org/int/di...|AcquiredIntangibl...| 40.0|  AcquiredTrademarks| arc|
|//xbrl.org/int/di...|AcquiredIntangibl...| 50.0|AcquiredOtherSimi...| arc|
|//xbrl.org/int/di...|              Assets| 10.0|    NoncurrentAssets| arc|
|//xbrl.org/int/di...|              Assets| 20.0|       CurrentAssets| arc|
|//xbrl.org/int/di...|BalanceSheetAccou...| 10.0|              Assets| arc|
|//xbrl.org/int/di...|BalanceSheetAccou...| 20.0|LiabilitiesAndEquity| arc|
|//xbrl.org/int/di...|BalanceSheetAccou...| 30.0|     CommonHypercube| arc|
|//xbrl.org/int/di...|     CommonHypercube| 10.0|ConsolidatedSoloD...| arc|
|//xbrl.org/int/di...|CompletedDevelopm...| 10.0|ConcessionsOrigin...| arc|
|//xbrl.org/int/di...|CompletedDevelopm...| 20.0|PatentsOriginatin...| arc|
|//xbrl.org/int/di...|CompletedDevelopm...| 30.0|TrademarksOrigina...| arc|
|//xbrl.org/int/di...|CompletedDevelopm...| 40.0|OtherSimilarRight...| arc|
|//xbrl.org/int/di...|ContractWorkInPro...| 10.0|ShorttermContract...| arc|
|//xbrl.org/int/di...|ContractWorkInPro...| 20.0|LongtermContractW...| arc|
|//xbrl.org/int/di...|ConvertibleProfit...| 10.0|ConvertibleProfit...| arc|
|//xbrl.org/int/di...|ConvertibleProfit...| 20.0|ConvertibleProfit...| arc|
|//xbrl.org/int/di...|       CurrentAssets| 10.0|         Inventories| arc|
|//xbrl.org/int/di...|       CurrentAssets| 20.0|ShorttermReceivables| arc|
|//xbrl.org/int/di...|       CurrentAssets| 30.0|ShorttermInvestments| arc|
|//xbrl.org/int/di...|       CurrentAssets| 40.0|CashAndCashEquiva...| arc|
|//xbrl.org/int/di...|         DebtToBanks| 10.0| LongtermDebtToBanks| arc|
|//xbrl.org/int/di...|         DebtToBanks| 20.0|ShorttermDebtToBanks| arc|
|//xbrl.org/int/di...|DebtToOtherCredit...| 10.0|LongtermDebtToOth...| arc|
|//xbrl.org/int/di...|DebtToOtherCredit...| 20.0|ShorttermDebtToOt...| arc|
|//xbrl.org/int/di...|      DeferredIncome| 10.0|LongtermDeferredI...| arc|
|//xbrl.org/int/di...|      DeferredIncome| 20.0|ShorttermDeferred...| arc|
|//xbrl.org/int/di...|DepositsLiabiliti...|  1.0|DepositsShortterm...| arc|
|//xbrl.org/int/di...|DepositsLiabiliti...|  2.0|DepositsLongtermL...| arc|
|//xbrl.org/int/di...|DevelopmentProjec...| 10.0|DevelopmentProjec...| arc|
|//xbrl.org/int/di...|DevelopmentProjec...| 20.0|PrepaymentsForInt...| arc|
|//xbrl.org/int/di...|              Equity| 10.0|  ContributedCapital| arc|
|//xbrl.org/int/di...|              Equity|101.0|           HedgeFund| arc|
|//xbrl.org/int/di...|              Equity|102.0|         ReserveFund| arc|
|//xbrl.org/int/di...|              Equity|103.0|TransferredToFrom...| arc|
|//xbrl.org/int/di...|              Equity|104.0|  LiquidationAccount| arc|
|//xbrl.org/int/di...|              Equity| 20.0|PaidContributedCa...| arc|
|//xbrl.org/int/di...|              Equity| 30.0|        SharePremium| arc|
|//xbrl.org/int/di...|              Equity| 40.0|  RevaluationReserve| arc|
|//xbrl.org/int/di...|              Equity| 50.0|       OtherReserves| arc|
|//xbrl.org/int/di...|              Equity| 60.0|    RetainedEarnings| arc|
|//xbrl.org/int/di...|              Equity| 70.0|       Distributions| arc|
|//xbrl.org/int/di...|              Equity| 80.0|ProposedDividendR...| arc|
|//xbrl.org/int/di...|              Equity| 85.0|ProposedExtraordi...| arc|
|//xbrl.org/int/di...|              Equity| 90.0|NotPaidContribute...| arc|
|//xbrl.org/int/di...|          EquityLoan| 10.0| ShorttermEquityLoan| arc|
|//xbrl.org/int/di...|          EquityLoan| 20.0|  LongtermEquityLoan| arc|
|//xbrl.org/int/di...|IncomeExceedCostF...|  1.0|IncomeExceedCostF...| arc|
|//xbrl.org/int/di...|IncomeExceedCostF...|  2.0|IncomeExceedCostF...| arc|
|//xbrl.org/int/di...|    IntangibleAssets| 10.0|CompletedDevelopm...| arc|
|//xbrl.org/int/di...|    IntangibleAssets| 20.0|AcquiredIntangibl...| arc|
|//xbrl.org/int/di...|    IntangibleAssets| 30.0|            Goodwill| arc|
|//xbrl.org/int/di...|    IntangibleAssets| 40.0|DevelopmentProjec...| arc|
|//xbrl.org/int/di...|         Inventories| 10.0|RawMaterialsAndCo...| arc|
|//xbrl.org/int/di...|         Inventories| 20.0|      WorkInProgress| arc|
|//xbrl.org/int/di...|         Inventories| 30.0|ManufacturedGoods...| arc|
|//xbrl.org/int/di...|         Inventories| 40.0| PrepaymentsForGoods| arc|
|//xbrl.org/int/di...|         Inventories| 50.0|           Livestock| arc|
|//xbrl.org/int/di...|         Inventories| 51.0|  AssetsMeantForSale| arc|
|//xbrl.org/int/di...|    LeaseCommitments| 10.0|LongtermLeaseComm...| arc|
|//xbrl.org/int/di...|    LeaseCommitments| 20.0|ShorttermLeaseCom...| arc|
|//xbrl.org/int/di...|LiabilitiesAndEquity| 10.0|              Equity| arc|
|//xbrl.org/int/di...|LiabilitiesAndEquity| 20.0|   MinorityInterests| arc|
|//xbrl.org/int/di...|LiabilitiesAndEquity| 30.0|          Provisions| arc|
|//xbrl.org/int/di...|LiabilitiesAndEquity| 40.0|LiabilitiesOtherT...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 10.0|        MortgageDebt| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|100.0|         TaxPayables| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|110.0|TaxPayablesToGrou...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|120.0|       OtherPayables| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|130.0|      DeferredIncome| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|140.0|    NegativeGoodwill| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|150.0|    LeaseCommitments| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|160.0|    ProposedDividend| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|170.0|ShorttermPartOfLo...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|180.0|LongtermLiabiliti...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|190.0|ShorttermLiabilit...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 20.0|         DebtToBanks| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|200.0|          EquityLoan| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|210.0|PayablesToShareho...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|220.0|PrepaymentsOfWork...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|230.0|ContractWorkInPro...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|231.0|DepositsLiabiliti...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...|232.0|IncomeExceedCostF...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 30.0|OtherDebtRaisedBy...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 40.0|DebtToOtherCredit...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 50.0|ConvertibleProfit...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 60.0|PrepaymentsReceiv...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 70.0|       TradePayables| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 80.0|PayablesToGroupEn...| arc|
|//xbrl.org/int/di...|LiabilitiesOtherT...| 90.0|PayablesToAssociates| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 10.0|LongtermInvestmen...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...|100.0|CostExceedsIncome...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...|101.0|ContributedCapita...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 20.0|LongtermReceivabl...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 30.0|LongtermInvestmen...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 40.0|LongtermReceivabl...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 50.0|OtherLongtermInve...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 60.0|OtherLongtermRece...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 70.0|LongtermReceivabl...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 80.0|NoncurrentDeferre...| arc|
|//xbrl.org/int/di...|LongtermInvestmen...| 90.0|DepositsLongtermI...| arc|
|//xbrl.org/int/di...|        MortgageDebt| 10.0|LongtermMortgageDebt| arc|
|//xbrl.org/int/di...|        MortgageDebt| 20.0|ShorttermMortgage...| arc|
|//xbrl.org/int/di...|    NegativeGoodwill| 10.0|LongtermNegativeG...| arc|
|//xbrl.org/int/di...|    NegativeGoodwill| 20.0|ShorttermNegative...| arc|
|//xbrl.org/int/di...|    NoncurrentAssets| 10.0|    IntangibleAssets| arc|
|//xbrl.org/int/di...|    NoncurrentAssets| 20.0|PropertyPlantAndE...| arc|
|//xbrl.org/int/di...|    NoncurrentAssets| 30.0|LongtermInvestmen...| arc|
|//xbrl.org/int/di...|OtherDebtRaisedBy...| 10.0|OtherLongtermDebt...| arc|
|//xbrl.org/int/di...|OtherDebtRaisedBy...| 20.0|OtherShorttermDeb...| arc|
|//xbrl.org/int/di...|       OtherPayables| 10.0|OtherLongtermPaya...| arc|
|//xbrl.org/int/di...|       OtherPayables| 20.0|OtherShorttermPay...| arc|
|//xbrl.org/int/di...|       OtherReserves| 10.0|ReserveForNetReva...| arc|
|//xbrl.org/int/di...|       OtherReserves| 20.0|ReserveForLoansAn...| arc|
|//xbrl.org/int/di...|       OtherReserves| 30.0|ReserveForUnpaidC...| arc|
|//xbrl.org/int/di...|       OtherReserves| 35.0|ReserveForNetReva...| arc|
|//xbrl.org/int/di...|       OtherReserves| 40.0|OtherStatutoryRes...| arc|
|//xbrl.org/int/di...|       OtherReserves| 50.0|ReserveAccordingT...| arc|
|//xbrl.org/int/di...|       OtherReserves| 61.0|ReserveForBiologi...| arc|
|//xbrl.org/int/di...|       OtherReserves| 70.0| RestOfOtherReserves| arc|
|//xbrl.org/int/di...|PayablesToAssociates| 10.0|LongtermPayablesT...| arc|
|//xbrl.org/int/di...|PayablesToAssociates| 20.0|ShorttermPayables...| arc|
|//xbrl.org/int/di...|PayablesToGroupEn...| 10.0|LongtermPayablesT...| arc|
|//xbrl.org/int/di...|PayablesToGroupEn...| 20.0|ShorttermPayables...| arc|
|//xbrl.org/int/di...|PayablesToShareho...| 10.0|ShorttermPayables...| arc|
|//xbrl.org/int/di...|PayablesToShareho...| 11.0|LongtermPayablesT...| arc|
|//xbrl.org/int/di...|PrepaymentsOfWork...| 10.0|ShorttermPrepayme...| arc|
|//xbrl.org/int/di...|PrepaymentsOfWork...| 11.0|LongtermPrepaymen...| arc|
|//xbrl.org/int/di...|PrepaymentsReceiv...| 10.0|LongtermPrepaymen...| arc|
|//xbrl.org/int/di...|PrepaymentsReceiv...| 20.0|ShorttermPrepayme...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 10.0|    LandAndBuildings| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...|100.0|PropertyPlantAndE...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 20.0|  InvestmentProperty| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 30.0|OtherInvestmentAs...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 40.0|   PlantAndMachinery| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 50.0|FixturesFittingsT...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 60.0|    BiologicalAssets| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 70.0|LeaseholdImprovem...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 80.0|               Ships| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 90.0|              Planes| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 10.0|PropertyPlantAndE...| arc|
|//xbrl.org/int/di...|PropertyPlantAndE...| 20.0|PrepaymentsForPro...| arc|
|//xbrl.org/int/di...|          Provisions| 10.0|ProvisionsForPens...| arc|
|//xbrl.org/int/di...|          Provisions| 20.0|ProvisionsForDefe...| arc|
|//xbrl.org/int/di...|          Provisions| 30.0|     OtherProvisions| arc|
|//xbrl.org/int/di...|          Provisions| 40.0|ProvisionsForInve...| arc|
|//xbrl.org/int/di...|          Provisions| 50.0|ProvisionsForInve...| arc|
|//xbrl.org/int/di...|          Provisions| 51.0|ProvisionsForInco...| arc|
|//xbrl.org/int/di...|ShorttermInvestments| 10.0|ShorttermInvestme...| arc|
|//xbrl.org/int/di...|ShorttermInvestments| 20.0|ShorttermInvestme...| arc|
|//xbrl.org/int/di...|ShorttermInvestments| 30.0|OtherShorttermInv...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 10.0|ShorttermTradeRec...| arc|
|//xbrl.org/int/di...|ShorttermReceivables|100.0|ContributedCapita...| arc|
|//xbrl.org/int/di...|ShorttermReceivables|110.0|ShorttermReceivab...| arc|
|//xbrl.org/int/di...|ShorttermReceivables|120.0|DeferredIncomeAssets| arc|
|//xbrl.org/int/di...|ShorttermReceivables|121.0|CostExceedsIncome...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 20.0|ContractWorkInPro...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 30.0|ShorttermReceivab...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 40.0|ShorttermReceivab...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 50.0|ShorttermReceivab...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 60.0|ShorttermReceivab...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 70.0|CurrentDeferredTa...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 80.0|ShorttermTaxRecei...| arc|
|//xbrl.org/int/di...|ShorttermReceivables| 90.0|OtherShorttermRec...| arc|
|//xbrl.org/int/di...|         TaxPayables| 10.0| LongtermTaxPayables| arc|
|//xbrl.org/int/di...|         TaxPayables| 20.0|ShorttermTaxPayables| arc|
|//xbrl.org/int/di...|TaxPayablesToGrou...| 10.0|LongtermTaxPayabl...| arc|
|//xbrl.org/int/di...|TaxPayablesToGrou...| 20.0|ShorttermTaxPayab...| arc|
|//xbrl.org/int/di...|       TradePayables| 10.0|LongtermTradePaya...| arc|
|//xbrl.org/int/di...|       TradePayables| 20.0|ShorttermTradePay...| arc|
+--------------------+--------------------+-----+--------------------+----+


In [10]:
e = (edges
     .withColumnRenamed(existing="from",new="src")
     .withColumnRenamed(existing="to",new="dst")
    )

v = (vert.withColumnRenamed(existing="label",new="id"))

In [11]:
graph = GraphFrame(v=v,e=e)

In [12]:
graph.outDegrees.orderBy(F.col("outDegree").desc()).show(truncate=False)


+---------------------------------+---------+
|id                               |outDegree|
+---------------------------------+---------+
|LiabilitiesOtherThanProvisions   |25       |
|Equity                           |14       |
|ShorttermReceivables             |13       |
|LongtermInvestmentsAndReceivables|11       |
|PropertyPlantAndEquipment        |10       |
|OtherReserves                    |8        |
|Provisions                       |6        |
|Inventories                      |6        |
|AcquiredIntangibleAssets         |5        |
|CompletedDevelopmentProjects     |4        |
|LiabilitiesAndEquity             |4        |
|CurrentAssets                    |4        |
|IntangibleAssets                 |4        |
|NoncurrentAssets                 |3        |
|ShorttermInvestments             |3        |
|BalanceSheetAccountFormAbstract  |3        |
|NegativeGoodwill                 |2        |
|PayablesToGroupEnterprises       |2        |
|LeaseCommitments                 |2        |
|PrepaymentsOfWorkInProgress      |2        |
+---------------------------------+---------+
only showing top 20 rows


In [13]:
iDdf = graph.vertices.select("id")
iDdf.groupBy("id").count().show()


+--------------------+-----+
|                  id|count|
+--------------------+-----+
|ShorttermTradePay...|    1|
|          EquityLoan|    1|
|TaxPayablesToGrou...|    1|
|ConvertibleProfit...|    1|
|LeaseholdImprovem...|    1|
|LongtermDebtToOth...|    1|
|OtherShorttermInv...|    1|
|LiabilitiesAndEquity|    1|
|LongtermContractW...|    1|
|OtherShorttermRec...|    1|
|BalanceSheetAccou...|    1|
|    NegativeGoodwill|    1|
| LongtermDebtToBanks|    1|
|    IntangibleAssets|    1|
|CostExceedsIncome...|    1|
|CompletedDevelopm...|    1|
|  LongtermEquityLoan|    1|
|           Livestock|    1|
|         TaxPayables|    1|
|NoncurrentDeferre...|    1|
+--------------------+-----+
only showing top 20 rows


In [21]:
graph.edges.filter(F.col("src")=="Assets")


Out[21]:
DataFrame[arcrole: string, src: string, order: string, dst: string, type: string]

In [ ]:
G = nx.from_pandas_dataframe(graph.edges.toPandas(),"src","dst")

In [24]:
nx.draw(G)
plt.show()



In [25]:
nx.draw_random(G)
plt.show()



In [254]:
nx.draw_spectral(G)
plt.show()



In [20]:
r = np.random.RandomState(seed=5)
ints = r.random_integers(1, 10, size=(3,2))
a = ['A', 'B', 'C']
b = ['D', 'A', 'E']
df = pd.DataFrame(ints, columns=['weight', 'cost'])
df[0] = a
df['b'] = b
df
G=nx.from_pandas_dataframe(df, 0, 'b', ['weight', 'cost'])
nx.draw(G)
plt.show()


/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:2: DeprecationWarning: This function is deprecated. Please call randint(1, 10 + 1) instead
  from ipykernel import kernelapp as app

In [ ]:
import networkx as nx

def hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5 ):
    '''If there is a cycle that is reachable from root, then result will not be a hierarchy.

       G: the graph
       root: the root node of current branch
       width: horizontal space allocated for this branch - avoids overlap with other branches
       vert_gap: gap between levels of hierarchy
       vert_loc: vertical location of root
       xcenter: horizontal location of root
    '''

    def h_recur(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, 
                  pos = None, parent = None, parsed = [] ):
        if(root not in parsed):
            parsed.append(root)
            if pos == None:
                pos = {root:(xcenter,vert_loc)}
            else:
                pos[root] = (xcenter, vert_loc)
            neighbors = G.neighbors(root)
            if parent != None:
                neighbors.remove(parent)
            if len(neighbors)!=0:
                dx = width/len(neighbors) 
                nextx = xcenter - width/2 - dx/2
                for neighbor in neighbors:
                    nextx += dx
                    pos = h_recur(G,neighbor, width = dx, vert_gap = vert_gap, 
                                        vert_loc = vert_loc-vert_gap, xcenter=nextx, pos=pos, 
                                        parent = root, parsed = parsed)
        return pos

    return h_recur(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5)

In [ ]: