In [1]:
from pyspark import SparkConf
from pyspark.sql import SparkSession, SQLContext
from pyspark.sql import functions as F
from functools import reduce
from pyspark.sql.window import Window

#Python imports
import sys

In [4]:
#spark = SparkSession(sc).builder.master("local[*]").appName("TestingCvr").getOrCreate()
conf = sc.getConf()
conf.setAppName("TestingCvr")
print(conf.getAll())
sqlContext = SQLContext(sc)


[('hive.metastore.warehouse.dir', 'file:/home/svanhmic/workspace/Python/Erhvervs/src/notebooks/cvr/spark-warehouse'), ('spark.sql.catalogImplementation', 'hive'), ('spark.rdd.compress', 'True'), ('spark.app.name', 'TestingCvr'), ('spark.driver.host', '10.52.1.5'), ('spark.serializer.objectStreamReset', '100'), ('spark.master', 'local[*]'), ('spark.executor.id', 'driver'), ('spark.submit.deployMode', 'client'), ('spark.driver.port', '35356'), ('spark.driver.memory', '6G'), ('spark.app.id', 'local-1483957703077')]

In [5]:
if __name__ == '__main__':
    cvrData = "/home/svanhmic/workspace/Python/Erhvervs/data/cdata/virksomhedersMetadata.json/"
    allCVRData = "/home/svanhmic/workspace/Python/Erhvervs/data/cdata/AlleDatavirksomheder.json/"
    cvrDf = sqlContext.read.json(allCVRData)
    #


---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-5-b36f50d0690d> in <module>()
      2     cvrData = "/home/svanhmic/workspace/Python/Erhvervs/data/cdata/virksomhedersMetadata.json/"
      3     allCVRData = "/home/svanhmic/workspace/Python/Erhvervs/data/cdata/AlleDatavirksomheder.json/"
----> 4     cvrDf = sqlContext.read.json(allCVRData)
      5     #

/usr/local/share/spark/python/pyspark/sql/readwriter.py in json(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord)
    218             path = [path]
    219         if type(path) == list:
--> 220             return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
    221         elif isinstance(path, RDD):
    222             def func(iterator):

/usr/local/share/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
    931         answer = self.gateway_client.send_command(command)
    932         return_value = get_return_value(
--> 933             answer, self.gateway_client, self.target_id, self.name)
    934 
    935         for temp_arg in temp_args:

/usr/local/share/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

/usr/local/share/spark/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    310                 raise Py4JJavaError(
    311                     "An error occurred while calling {0}{1}{2}.\n".
--> 312                     format(target_id, ".", name), value)
    313             else:
    314                 raise Py4JError(

Py4JJavaError: An error occurred while calling o151.json.
: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:171)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:258)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:359)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:263)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive$lzycompute(HiveSharedState.scala:39)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive(HiveSharedState.scala:38)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog$lzycompute(HiveSharedState.scala:46)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog(HiveSharedState.scala:45)
	at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:50)
	at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
	at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
	at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
	at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:287)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:211)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	... 33 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	... 39 more
Caused by: javax.jdo.JDOFatalDataStoreException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:171)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:258)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:359)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:263)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive$lzycompute(HiveSharedState.scala:39)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive(HiveSharedState.scala:38)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog$lzycompute(HiveSharedState.scala:46)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog(HiveSharedState.scala:45)
	at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:50)
	at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
	at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
	at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
	at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:287)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:211)
	at java.lang.Thread.run(Thread.java:745)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 98 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /home/svanhmic/workspace/Python/Erhvervs/src/notebooks/cvr/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	... 95 more
------

NestedThrowables:
java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:171)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:258)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:359)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:263)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive$lzycompute(HiveSharedState.scala:39)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive(HiveSharedState.scala:38)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog$lzycompute(HiveSharedState.scala:46)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog(HiveSharedState.scala:45)
	at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:50)
	at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
	at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
	at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
	at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:287)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:211)
	at java.lang.Thread.run(Thread.java:745)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 98 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /home/svanhmic/workspace/Python/Erhvervs/src/notebooks/cvr/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	... 95 more
------

	at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:436)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	... 44 more
Caused by: java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:171)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:258)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:359)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:263)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive$lzycompute(HiveSharedState.scala:39)
	at org.apache.spark.sql.hive.HiveSharedState.metadataHive(HiveSharedState.scala:38)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog$lzycompute(HiveSharedState.scala:46)
	at org.apache.spark.sql.hive.HiveSharedState.externalCatalog(HiveSharedState.scala:45)
	at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:50)
	at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
	at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
	at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
	at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
	at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:287)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:211)
	at java.lang.Thread.run(Thread.java:745)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 98 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /home/svanhmic/workspace/Python/Erhvervs/src/notebooks/cvr/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	... 95 more
------

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at com.jolbox.bonecp.PoolUtil.generateSQLException(PoolUtil.java:192)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:422)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	... 73 more
Caused by: java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	... 85 more
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7a1ece89, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 98 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /home/svanhmic/workspace/Python/Erhvervs/src/notebooks/cvr/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	... 95 more

In [4]:
cvrDf.printSchema()


root
 |-- virksomhed: struct (nullable = true)
 |    |-- aarsbeskaeftigelse: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- intervalKodeAntalInklusivEjere: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- attributter: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |-- type: string (nullable = true)
 |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |-- vaerditype: string (nullable = true)
 |    |-- beliggenhedsadresse: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- bogstavFra: string (nullable = true)
 |    |    |    |-- bogstavTil: string (nullable = true)
 |    |    |    |-- bynavn: string (nullable = true)
 |    |    |    |-- conavn: string (nullable = true)
 |    |    |    |-- etage: string (nullable = true)
 |    |    |    |-- fritekst: string (nullable = true)
 |    |    |    |-- husnummerFra: long (nullable = true)
 |    |    |    |-- husnummerTil: long (nullable = true)
 |    |    |    |-- kommune: struct (nullable = true)
 |    |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- landekode: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- postboks: string (nullable = true)
 |    |    |    |-- postdistrikt: string (nullable = true)
 |    |    |    |-- postnummer: long (nullable = true)
 |    |    |    |-- sidedoer: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- vejkode: long (nullable = true)
 |    |    |    |-- vejnavn: string (nullable = true)
 |    |-- bibranche1: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- bibranche2: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- bibranche3: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- binavne: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- navn: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- brancheAnsvarskode: long (nullable = true)
 |    |-- cvrNummer: long (nullable = true)
 |    |-- dataAdgang: long (nullable = true)
 |    |-- deltagerRelation: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- deltager: struct (nullable = true)
 |    |    |    |    |-- beliggenhedsadresse: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- bogstavFra: string (nullable = true)
 |    |    |    |    |    |    |-- bogstavTil: string (nullable = true)
 |    |    |    |    |    |    |-- bynavn: string (nullable = true)
 |    |    |    |    |    |    |-- conavn: string (nullable = true)
 |    |    |    |    |    |    |-- etage: string (nullable = true)
 |    |    |    |    |    |    |-- fritekst: string (nullable = true)
 |    |    |    |    |    |    |-- husnummerFra: long (nullable = true)
 |    |    |    |    |    |    |-- husnummerTil: long (nullable = true)
 |    |    |    |    |    |    |-- kommune: struct (nullable = true)
 |    |    |    |    |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |    |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |-- landekode: string (nullable = true)
 |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |-- postboks: string (nullable = true)
 |    |    |    |    |    |    |-- postdistrikt: string (nullable = true)
 |    |    |    |    |    |    |-- postnummer: long (nullable = true)
 |    |    |    |    |    |    |-- sidedoer: string (nullable = true)
 |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |-- vejkode: long (nullable = true)
 |    |    |    |    |    |    |-- vejnavn: string (nullable = true)
 |    |    |    |    |-- enhedsNummer: long (nullable = true)
 |    |    |    |    |-- enhedstype: string (nullable = true)
 |    |    |    |    |-- forretningsnoegle: long (nullable = true)
 |    |    |    |    |-- navne: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- navn: string (nullable = true)
 |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |-- sidstIndlaest: string (nullable = true)
 |    |    |    |-- kontorsteder: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- attributter: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |    |    |    |    |-- penhed: struct (nullable = true)
 |    |    |    |    |    |    |-- beliggenhedsadresse: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |-- bogstavFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- bogstavTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- bynavn: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- conavn: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- etage: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- husnummerFra: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- husnummerTil: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- kommune: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- landekode: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- postboks: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- postdistrikt: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- postnummer: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- sidedoer: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- vejkode: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- vejnavn: string (nullable = true)
 |    |    |    |    |    |    |-- enhedsNummer: long (nullable = true)
 |    |    |    |    |    |    |-- enhedstype: string (nullable = true)
 |    |    |    |    |    |    |-- forretningsnoegle: long (nullable = true)
 |    |    |    |    |    |    |-- navne: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |-- navn: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |-- sidstIndlaest: string (nullable = true)
 |    |    |    |-- organisationer: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- attributter: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |    |    |    |    |-- enhedsNummerOrganisation: long (nullable = true)
 |    |    |    |    |    |-- hovedtype: string (nullable = true)
 |    |    |    |    |    |-- medlemsData: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- attributter: array (nullable = true)
 |    |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |    |    |    |    |-- organisationsNavn: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- navn: string (nullable = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- elektroniskPost: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- hemmelig: boolean (nullable = true)
 |    |    |    |-- kontaktoplysning: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- enhedsNummer: long (nullable = true)
 |    |-- enhedstype: string (nullable = true)
 |    |-- fejlBeskrivelse: string (nullable = true)
 |    |-- fejlRegistreret: boolean (nullable = true)
 |    |-- fejlVedIndlaesning: boolean (nullable = true)
 |    |-- fusioner: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- enhedsNummerOrganisation: long (nullable = true)
 |    |    |    |-- indgaaende: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |    |    |-- organisationsNavn: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- navn: string (nullable = true)
 |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- udgaaende: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |-- hjemmeside: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- hemmelig: boolean (nullable = true)
 |    |    |    |-- kontaktoplysning: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- hovedbranche: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- kvartalsbeskaeftigelse: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- kvartal: long (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- livsforloeb: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- maanedsbeskaeftigelse: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- maaned: long (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- naermesteFremtidigeDato: string (nullable = true)
 |    |-- navne: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- navn: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- obligatoriskEmail: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- hemmelig: boolean (nullable = true)
 |    |    |    |-- kontaktoplysning: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- penheder: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- pNummer: long (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- postadresse: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- bogstavFra: string (nullable = true)
 |    |    |    |-- bynavn: string (nullable = true)
 |    |    |    |-- conavn: string (nullable = true)
 |    |    |    |-- etage: string (nullable = true)
 |    |    |    |-- fritekst: string (nullable = true)
 |    |    |    |-- husnummerFra: long (nullable = true)
 |    |    |    |-- husnummerTil: long (nullable = true)
 |    |    |    |-- kommune: struct (nullable = true)
 |    |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- landekode: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- postboks: string (nullable = true)
 |    |    |    |-- postdistrikt: string (nullable = true)
 |    |    |    |-- postnummer: long (nullable = true)
 |    |    |    |-- sidedoer: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- vejkode: long (nullable = true)
 |    |    |    |-- vejnavn: string (nullable = true)
 |    |-- regNummer: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- regnummer: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- reklamebeskyttet: boolean (nullable = true)
 |    |-- samtId: long (nullable = true)
 |    |-- sidstIndlaest: string (nullable = true)
 |    |-- sidstOpdateret: string (nullable = true)
 |    |-- spaltninger: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- enhedsNummerOrganisation: long (nullable = true)
 |    |    |    |-- indgaaende: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |    |    |-- organisationsNavn: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- navn: string (nullable = true)
 |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- udgaaende: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- sekvensnr: long (nullable = true)
 |    |    |    |    |    |-- type: string (nullable = true)
 |    |    |    |    |    |-- vaerdier: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |    |    |    |    |-- vaerdi: string (nullable = true)
 |    |    |    |    |    |-- vaerditype: string (nullable = true)
 |    |-- status: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- kreditoplysningkode: long (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- statuskode: long (nullable = true)
 |    |-- telefaxNummer: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- hemmelig: boolean (nullable = true)
 |    |    |    |-- kontaktoplysning: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- telefonNummer: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- hemmelig: boolean (nullable = true)
 |    |    |    |-- kontaktoplysning: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |-- virkningsAktoer: string (nullable = true)
 |    |-- virksomhedMetadata: struct (nullable = true)
 |    |    |-- antalPenheder: long (nullable = true)
 |    |    |-- nyesteAarsbeskaeftigelse: struct (nullable = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- intervalKodeAntalInklusivEjere: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteBeliggenhedsadresse: struct (nullable = true)
 |    |    |    |-- bogstavFra: string (nullable = true)
 |    |    |    |-- bogstavTil: string (nullable = true)
 |    |    |    |-- bynavn: string (nullable = true)
 |    |    |    |-- conavn: string (nullable = true)
 |    |    |    |-- etage: string (nullable = true)
 |    |    |    |-- fritekst: string (nullable = true)
 |    |    |    |-- husnummerFra: long (nullable = true)
 |    |    |    |-- husnummerTil: long (nullable = true)
 |    |    |    |-- kommune: struct (nullable = true)
 |    |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- landekode: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- postboks: string (nullable = true)
 |    |    |    |-- postdistrikt: string (nullable = true)
 |    |    |    |-- postnummer: long (nullable = true)
 |    |    |    |-- sidedoer: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- vejkode: long (nullable = true)
 |    |    |    |-- vejnavn: string (nullable = true)
 |    |    |-- nyesteBibranche1: struct (nullable = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteBibranche2: struct (nullable = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteBibranche3: struct (nullable = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteFadCprnumre: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |    |-- nyesteHovedbranche: struct (nullable = true)
 |    |    |    |-- branchekode: string (nullable = true)
 |    |    |    |-- branchetekst: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteKontaktoplysninger: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |    |-- nyesteKvartalsbeskaeftigelse: struct (nullable = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- kvartal: long (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteMaanedsbeskaeftigelse: struct (nullable = true)
 |    |    |    |-- aar: long (nullable = true)
 |    |    |    |-- intervalKodeAntalAarsvaerk: string (nullable = true)
 |    |    |    |-- intervalKodeAntalAnsatte: string (nullable = true)
 |    |    |    |-- maaned: long (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteNavn: struct (nullable = true)
 |    |    |    |-- navn: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- nyesteStatus: struct (nullable = true)
 |    |    |    |-- kreditoplysningkode: long (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- statuskode: long (nullable = true)
 |    |    |-- nyesteVirksomhedsform: struct (nullable = true)
 |    |    |    |-- ansvarligDataleverandoer: string (nullable = true)
 |    |    |    |-- kortBeskrivelse: string (nullable = true)
 |    |    |    |-- langBeskrivelse: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- virksomhedsformkode: long (nullable = true)
 |    |    |-- sammensatStatus: string (nullable = true)
 |    |    |-- stiftelsesDato: string (nullable = true)
 |    |    |-- virkningsDato: string (nullable = true)
 |    |-- virksomhedsform: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- ansvarligDataleverandoer: string (nullable = true)
 |    |    |    |-- kortBeskrivelse: string (nullable = true)
 |    |    |    |-- langBeskrivelse: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- virksomhedsformkode: long (nullable = true)
 |    |-- virksomhedsstatus: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |    |-- status: string (nullable = true)


In [5]:
def getNextSchemaLayer(schema,idx,name="name"):
    schemaDict = schema[idx].jsonValue()
    return list([i[name] for i in schemaDict["type"]["fields"][:]])

In [6]:
def createNextLayerTable(df,nonExplodedColumns,explodedColumn,*nonExplodedPrefix):
        '''
            The method takes a dataframe and explodes a column of choice such that the contence is more accessible

            input 
                - df: data frame with information
                - nonExplodedColumns: List with columns that are "static" the columns are not altered e.g. cvr 
                - explodedColumn: String with the name of the column that gets exploded, note that column must be of an arraytype
                - nonExplodedPrefix: String if values are nested inside a 
            output
                - output: data frame where nonExplodedColumns are unchanged, except for that the values are dupplicated with
                for the explodedColum
        '''
        if len(nonExplodedPrefix) == 1:
            prefixedStr = nonExplodedPrefix[0]+"."
        else:
            prefixedStr = ".".join(nonExplodedPrefix)

        if nonExplodedPrefix is None:
            relationsDf = df.select([df[v].alias(v) for v in nonExplodedColumns]+
                                [F.explode(df[explodedColumn]).alias(explodedColumn)])
        else:
            relationsDf = df.select([df[prefixedStr+v].alias(v) for v in nonExplodedColumns]+
                                [F.explode(df[prefixedStr+explodedColumn]).alias(explodedColumn)])

        dfSchema = getNextSchemaLayer(relationsDf.schema,explodedColumn)
        return (relationsDf
                .select([relationsDf[u] for u in nonExplodedColumns]
                        +[relationsDf[explodedColumn][v].alias(v) for v in dfSchema]))

In [7]:
def expandSubCols(df,mainColumn):
    '''
    The method expands all subcolumns in the next layer of mainColumn
    
    input:
        df - data frame with data
        mainColumn - the column that contains the subcolumns that should be flattent
    '''
    dfCols = df.columns
    dfCols.remove(mainColumn)
    return df.select([df[v] for v in dfCols]+
                     [df[mainColumn][u].alias(u) for u in getNextSchemaLayer(schema=df.schema,idx=mainColumn)])

In [ ]:


In [8]:
secondLayerSchema = getNextSchemaLayer(cvrDf.schema,"virksomhed")
#cvrDf.select(cvrDf["virksomhed.attributter.sekvensnr"]).show()
print(secondLayerSchema)


['aarsbeskaeftigelse', 'attributter', 'beliggenhedsadresse', 'bibranche1', 'bibranche2', 'bibranche3', 'binavne', 'brancheAnsvarskode', 'cvrNummer', 'dataAdgang', 'deltagerRelation', 'elektroniskPost', 'enhedsNummer', 'enhedstype', 'fejlBeskrivelse', 'fejlRegistreret', 'fejlVedIndlaesning', 'fusioner', 'hjemmeside', 'hovedbranche', 'kvartalsbeskaeftigelse', 'livsforloeb', 'maanedsbeskaeftigelse', 'naermesteFremtidigeDato', 'navne', 'obligatoriskEmail', 'penheder', 'postadresse', 'regNummer', 'reklamebeskyttet', 'samtId', 'sidstIndlaest', 'sidstOpdateret', 'spaltninger', 'status', 'telefaxNummer', 'telefonNummer', 'virkningsAktoer', 'virksomhedMetadata', 'virksomhedsform', 'virksomhedsstatus']

In [9]:
#Aarsvaerk as a table
vaekstDf = createNextLayerTable(cvrDf,["cvrNummer"],"aarsbeskaeftigelse","virksomhed")
#vaekstDf.show()

regexedVaekstDf = (vaekstDf.select(vaekstDf["cvrNummer"],vaekstDf["aar"]
                                      ,F.split(F.regexp_extract("intervalKodeAntalAarsvaerk",r'(\d{1,4}_\d{1,4})',0),r"\_").alias("intervalKodeAntalAarsvaerk")
                                      ,F.split(F.regexp_extract("intervalKodeAntalAnsatte",r'(\d{1,4}_\d{1,4})',0),r"\_").alias("intervalKodeAntalAnsatte")
                                      ,F.split(F.regexp_extract("intervalKodeAntalInklusivEjere",r'(\d{1,4}_\d{1,4})',0),r"\_").alias("intervalKodeAntalInklusivEjere")
                                      ,vaekstDf["sidstOpdateret"]))
regexedVaekstDf.orderBy(["cvrNummer","aar"]).show(50)


+---------+----+--------------------------+------------------------+------------------------------+--------------------+
|cvrNummer| aar|intervalKodeAntalAarsvaerk|intervalKodeAntalAnsatte|intervalKodeAntalInklusivEjere|      sidstOpdateret|
+---------+----+--------------------------+------------------------+------------------------------+--------------------+
| 10000009|1999|                    [1, 1]|                  [1, 1]|                        [1, 1]|2001-07-24T15:50:...|
| 10000025|2000|                    [1, 1]|                  [0, 0]|                        [0, 0]|2002-05-24T17:47:...|
| 10000025|2001|                    [1, 1]|                  [1, 1]|                        [1, 1]|2003-05-12T17:30:...|
| 10000025|2002|                    [1, 1]|                  [1, 1]|                        [1, 1]|2004-06-25T17:50:...|
| 10000025|2003|                    [1, 1]|                  [1, 1]|                        [1, 1]|2005-12-02T17:06:...|
| 10000025|2004|                    [1, 1]|                  [1, 1]|                        [1, 1]|2006-05-18T17:33:...|
| 10000025|2005|                    [1, 1]|                  [2, 4]|                        [2, 4]|2007-07-18T13:50:...|
| 10000025|2006|                    [1, 1]|                  [2, 4]|                        [2, 4]|2008-10-03T12:44:...|
| 10000025|2007|                    [1, 1]|                  [2, 4]|                        [2, 4]|2009-09-08T12:46:...|
| 10000025|2008|                    [1, 1]|                  [1, 1]|                        [1, 1]|2010-10-14T12:43:...|
| 10000025|2009|                    [1, 1]|                  [0, 0]|                        [0, 0]|2011-11-30T11:43:...|
| 10000106|1997|                    [1, 1]|                  [2, 4]|                        [2, 4]|1999-10-17T02:51:...|
| 10000106|1998|                    [1, 1]|                  [2, 4]|                        [2, 4]|2000-06-29T15:32:...|
| 10000106|1999|                    [2, 4]|                  [2, 4]|                        [2, 4]|2001-07-24T15:53:...|
| 10000106|2000|                    [1, 1]|                  [2, 4]|                        [2, 4]|2002-05-24T15:47:...|
| 10000106|2001|                    [2, 4]|                  [2, 4]|                        [2, 4]|2003-05-12T15:30:...|
| 10000106|2002|                    [2, 4]|                  [2, 4]|                        [2, 4]|2004-06-25T15:50:...|
| 10000106|2003|                    [2, 4]|                  [2, 4]|                        [2, 4]|2005-12-02T16:07:...|
| 10000106|2004|                    [2, 4]|                  [2, 4]|                        [2, 4]|2006-05-18T15:30:...|
| 10000106|2005|                    [2, 4]|                  [5, 9]|                        [5, 9]|2007-12-03T10:43:...|
| 10000106|2006|                    [2, 4]|                  [5, 9]|                        [5, 9]|2008-10-03T10:45:...|
| 10000122|1999|                    [1, 1]|                  [1, 1]|                        [1, 1]|2001-07-24T17:37:...|
| 10000122|2000|                    [2, 4]|                  [5, 9]|                        [5, 9]|2002-05-24T17:47:...|
| 10000122|2001|                    [1, 1]|                  [0, 0]|                        [0, 0]|2003-05-12T17:30:...|
| 10000173|2011|                    [0, 0]|                  [0, 0]|                        [1, 1]|2013-07-18T11:00:...|
| 10000211|1999|                    [1, 1]|                  [2, 4]|                        [2, 4]|2001-07-24T17:37:...|
| 10000211|2000|                    [1, 1]|                  [0, 0]|                        [0, 0]|2002-05-24T17:47:...|
| 10000254|1999|                    [1, 1]|                  [2, 4]|                        [2, 4]|2001-07-24T15:50:...|
| 10000254|2000|                  [20, 49]|                [20, 49]|                      [20, 49]|2002-05-24T15:47:...|
| 10000254|2001|                  [20, 49]|                [20, 49]|                      [20, 49]|2003-05-12T15:30:...|
| 10000254|2002|                  [20, 49]|                [20, 49]|                      [20, 49]|2004-06-25T15:50:...|
| 10000254|2003|                  [20, 49]|                [20, 49]|                      [20, 49]|2005-12-02T16:06:...|
| 10000254|2004|                  [20, 49]|                [50, 99]|                      [50, 99]|2006-05-18T15:33:...|
| 10000254|2005|                  [20, 49]|                [50, 99]|                      [50, 99]|2007-07-18T11:50:...|
| 10000254|2006|                  [20, 49]|                [50, 99]|                      [50, 99]|2008-10-03T10:44:...|
| 10000254|2007|                  [20, 49]|                [20, 49]|                      [20, 49]|2009-09-08T10:46:...|
| 10000254|2008|                  [50, 99]|                [50, 99]|                      [50, 99]|2010-10-14T10:43:...|
| 10000254|2009|                  [20, 49]|                [20, 49]|                      [20, 49]|2011-11-30T10:43:...|
| 10000262|2009|                    [1, 1]|                  [2, 4]|                        [2, 4]|2011-11-30T11:43:...|
| 10000270|2002|                    [0, 0]|                  [0, 0]|                        [1, 1]|2004-06-25T15:50:...|
| 10000270|2004|                    [0, 0]|                  [0, 0]|                        [1, 1]|2006-05-18T15:30:...|
| 10000297|1998|                    [0, 0]|                  [0, 0]|                        [1, 1]|2000-06-29T15:32:...|
| 10000297|1999|                    [0, 0]|                  [0, 0]|                        [1, 1]|2001-07-24T15:59:...|
| 10000297|2000|                    [0, 0]|                  [0, 0]|                        [1, 1]|2002-05-24T15:47:...|
| 10000297|2001|                    [0, 0]|                  [0, 0]|                        [1, 1]|2003-05-12T15:32:...|
| 10000297|2002|                    [0, 0]|                  [0, 0]|                        [1, 1]|2004-06-25T15:50:...|
| 10000297|2003|                    [0, 0]|                  [0, 0]|                        [1, 1]|2005-12-02T16:07:...|
| 10000297|2004|                    [0, 0]|                  [0, 0]|                        [1, 1]|2006-05-18T15:30:...|
| 10000297|2005|                    [0, 0]|                  [0, 0]|                        [1, 1]|2007-07-18T11:45:...|
| 10000297|2006|                    [0, 0]|                  [0, 0]|                        [1, 1]|2008-10-03T10:45:...|
+---------+----+--------------------------+------------------------+------------------------------+--------------------+
only showing top 50 rows


In [30]:
#Kvartalsverk  as a tabel
maanedsVerkDf = createNextLayerTable(cvrDf,["cvrNummer"],"kvartalsbeskaeftigelse","virksomhed")
#maanedsVerkDf.show()

reMaanedsVerkDf = (maanedsVerkDf
                   .select(maanedsVerkDf["cvrNummer"],maanedsVerkDf["aar"],maanedsVerkDf["kvartal"]
                           ,F.split(F.regexp_extract("intervalKodeAntalAarsvaerk",r'(\d{1,4}_\d{1,4})',0),r"\_").alias("intervalKodeAntalAarsvaerk")
                           ,F.split(F.regexp_extract("intervalKodeAntalAnsatte",r'(\d{1,4}_\d{1,4})',0),r"\_").alias("intervalKodeAntalAnsatte")                           
                           ,maanedsVerkDf["sidstOpdateret"]))
reMaanedsVerkDf.show()
print(reMaanedsVerkDf.dtypes)


+---------+----+-------+--------------------------+------------------------+--------------------+
|cvrNummer| aar|kvartal|intervalKodeAntalAarsvaerk|intervalKodeAntalAnsatte|      sidstOpdateret|
+---------+----+-------+--------------------------+------------------------+--------------------+
| 27850367|2004|      3|                      null|                  [1, 1]|2005-04-19T17:38:...|
| 27850367|2004|      4|                      null|                  [1, 1]|2005-10-04T12:50:...|
| 27850367|2005|      1|                      null|                  [1, 1]|2005-10-04T17:32:...|
| 27850367|2005|      2|                      null|                  [1, 1]|2005-12-02T16:56:...|
| 27850367|2005|      3|                      null|                  [0, 0]|2006-04-11T17:32:...|
| 27850367|2005|      4|                      null|                  [2, 4]|2006-09-01T12:45:...|
| 27850367|2006|      1|                      null|                  [1, 1]|2007-01-11T16:33:...|
| 27850367|2006|      2|                      null|                  [1, 1]|2007-01-12T10:46:...|
| 27850367|2006|      3|                      null|                  [1, 1]|2007-07-18T14:12:...|
| 27850367|2006|      4|                      null|                  [0, 0]|2007-09-03T10:04:...|
| 27850367|2007|      1|                      null|                  [0, 0]|2007-09-03T11:14:...|
| 26452635|2002|      1|                      null|                  [0, 0]|2002-09-24T17:46:...|
| 26452635|2002|      2|                      null|                  [1, 1]|2002-12-05T16:46:...|
| 26452635|2002|      3|                      null|                  [1, 1]|2003-03-13T16:50:...|
| 26452635|2002|      4|                      null|                  [1, 1]|2003-05-21T18:19:...|
| 26452635|2003|      1|                      null|                  [1, 1]|2003-09-11T19:52:...|
| 26452635|2003|      2|                      null|                  [1, 1]|2003-12-15T16:43:...|
| 26452635|2004|      3|                      null|                  [1, 1]|2005-04-19T17:38:...|
| 26452635|2004|      4|                      null|                  [1, 1]|2005-10-04T12:49:...|
| 26452635|2005|      1|                      null|                  [1, 1]|2005-10-04T17:31:...|
+---------+----+-------+--------------------------+------------------------+--------------------+
only showing top 20 rows

[('cvrNummer', 'bigint'), ('aar', 'bigint'), ('kvartal', 'bigint'), ('intervalKodeAntalAarsvaerk', 'array<string>'), ('intervalKodeAntalAnsatte', 'array<string>'), ('sidstOpdateret', 'string')]

In [10]:
#Get the attributes with values and periods as a table

attributesDf = createNextLayerTable(cvrDf,["cvrNummer"],"attributter","virksomhed")
#attributesDf.show(10)
#print(attributesDf.schema["cvrNummer"])
#get column names
attributesCols = attributesDf.columns
attributesCols.remove("vaerdier")

attributesWithValueDf = createNextLayerTable(attributesDf,attributesCols,"vaerdier")
#attributesWithValueDf.show(10)

attributesWithValueAndPeriodDf = expandSubCols(attributesWithValueDf,"periode")
attributesWithValueAndPeriodDf.orderBy(["cvrNummer","type"],ascending=[1, 1]).show(100)


+---------+---------+--------------------+----------+--------------------+--------------------+----------+----------+
|cvrNummer|sekvensnr|                type|vaerditype|      sidstOpdateret|              vaerdi| gyldigFra| gyldigTil|
+---------+---------+--------------------+----------+--------------------+--------------------+----------+----------+
| 10000009|        0|ARKIV_REGISTRERIN...|    string|2015-02-09T20:00:...|           ApS261654|1999-10-12|2001-12-11|
| 10000009|        0|              FORMÅL|    string|2015-02-09T20:00:...|Selskabets formål...|1999-10-12|2001-12-11|
| 10000009|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T20:00:...|          2000-09-30|1999-10-12|2001-12-11|
| 10000009|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T20:00:...|          1999-08-01|1999-10-12|2001-12-11|
| 10000009|        0|             KAPITAL|   decimal|2015-02-09T20:00:...|           125000.00|1999-10-12|2001-12-11|
| 10000009|        0|       KAPITALVALUTA|    string|2015-02-09T20:00:...|                 DKK|1999-10-12|2001-12-11|
| 10000009|        0|      NAVN_IDENTITET|    string|2015-02-09T20:00:...|              YELLOW|1999-10-12|2001-12-11|
| 10000009|        0|         PSEUDOCVRNR|   boolean|2015-02-09T20:00:...|               false|1999-10-12|2001-12-11|
| 10000009|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-09T20:00:...|             --09-30|1999-10-12|2001-12-11|
| 10000009|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-09T20:00:...|             --10-01|1999-10-12|2001-12-11|
| 10000009|        0|       TEGNINGSREGEL|    string|2015-02-09T20:00:...|Selskabet tegnes ...|1999-10-12|2001-12-11|
| 10000009|        0|     VEDTÆGT_SENESTE|      date|2015-02-09T20:00:...|          1999-10-12|1999-10-12|2001-12-11|
| 10000025|        0|ARKIV_REGISTRERIN...|    string|2015-02-10T00:00:...|           ApS261655|1999-10-13|      null|
| 10000025|        0|EJERREGISTRERING_...|   boolean|2015-11-16T15:12:...|               false|2015-11-16|      null|
| 10000025|        0|              FORMÅL|    string|2015-02-10T00:00:...|Selskabets formål...|2013-05-31|      null|
| 10000025|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          2000-12-31|1999-10-13|      null|
| 10000025|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          1999-10-13|1999-10-13|      null|
| 10000025|        0|GENOPTAGELSE_TVAN...|      date|2015-02-10T00:00:...|          2012-05-31|2012-05-31|      null|
| 10000025|        0|             KAPITAL|   decimal|2015-02-10T00:00:...|           125000.00|1999-10-13|      null|
| 10000025|        0|       KAPITALVALUTA|    string|2015-02-10T00:00:...|                 DKK|1999-10-13|      null|
| 10000025|        0|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|WATERFRONT CONNEC...|1999-10-13|      null|
| 10000025|        1|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|       GUITAR LOUNGE|1999-10-13|      null|
| 10000025|        0|         PSEUDOCVRNR|   boolean|2015-02-10T00:00:...|               false|1999-10-13|      null|
| 10000025|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-10T00:00:...|             --12-31|1999-10-13|      null|
| 10000025|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-10T00:00:...|             --01-01|1999-10-13|      null|
| 10000025|        0|   REVISION_FRAVALGT|   boolean|2015-02-10T00:00:...|                true|1999-10-13|      null|
| 10000025|        0|       TEGNINGSREGEL|    string|2015-02-10T00:00:...|Selskabets tegnes...|2013-05-31|      null|
| 10000025|        0|     VEDTÆGT_SENESTE|      date|2015-02-10T00:00:...|          2013-05-31|2013-05-31|      null|
| 10000068|        0|ARKIV_REGISTRERIN...|    string|2015-02-09T21:00:...|           FAS263254|1999-08-25|2001-12-20|
| 10000068|        0|              FORMÅL|    string|2015-02-09T21:00:...|Filialens formål ...|1999-08-25|2001-12-20|
| 10000068|        0|      NAVN_IDENTITET|    string|2015-02-09T21:00:...|STUDENTCONSULTING...|1999-10-18|2001-12-20|
| 10000068|        0|         PSEUDOCVRNR|   boolean|2015-02-09T21:00:...|               false|1999-08-25|2001-12-20|
| 10000068|        0|       TEGNINGSREGEL|    string|2015-02-09T21:00:...|Filialen tegnes a...|1999-08-25|2001-12-20|
| 10000122|        0|ARKIV_REGISTRERIN...|    string|2015-02-09T21:00:...|           ApS261686|1999-10-14|2002-08-15|
| 10000122|        0|              FORMÅL|    string|2015-02-09T21:00:...|Selskabets formål...|2000-02-01|2002-08-15|
| 10000122|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          2000-09-30|1999-10-14|2002-08-15|
| 10000122|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          1999-10-14|1999-10-14|2002-08-15|
| 10000122|        0|             KAPITAL|   decimal|2015-02-09T21:00:...|           125000.00|1999-10-14|2002-08-15|
| 10000122|        0|       KAPITALVALUTA|    string|2015-02-09T21:00:...|                 DKK|1999-10-14|2002-08-15|
| 10000122|        0|      NAVN_IDENTITET|    string|2015-02-09T21:00:...|  DIGITAL CENTER FYN|1999-10-14|2002-08-15|
| 10000122|        0|         PSEUDOCVRNR|   boolean|2015-02-09T21:00:...|               false|1999-10-14|2002-08-15|
| 10000122|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-09T21:00:...|             --09-30|1999-10-14|2002-08-15|
| 10000122|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-09T21:00:...|             --10-01|1999-10-14|2002-08-15|
| 10000122|        0|       TEGNINGSREGEL|    string|2015-02-09T21:00:...|Selskabet tegnes ...|2000-02-01|2002-08-15|
| 10000122|        0|     VEDTÆGT_SENESTE|      date|2015-02-09T21:00:...|          2000-02-01|2000-02-01|2002-08-15|
| 10000157|        0|ARKIV_REGISTRERIN...|    string|2015-02-10T00:00:...|           ApS262280|1999-10-14|      null|
| 10000157|        0|              FORMÅL|    string|2015-02-10T00:00:...|Selskabets formål...|1999-11-04|      null|
| 10000157|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          2000-12-31|1999-10-14|      null|
| 10000157|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          1999-10-14|1999-10-14|      null|
| 10000157|        0|             KAPITAL|   decimal|2015-02-10T00:00:...|           125000.00|1999-11-04|      null|
| 10000157|        0|       KAPITALVALUTA|    string|2015-02-10T00:00:...|                 DKK|1999-11-04|      null|
| 10000157|        0|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|         WITT INVEST|1999-11-04|      null|
| 10000157|        0|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|        WITT HOLDING|1999-10-14|1999-11-03|
| 10000157|        0|         PSEUDOCVRNR|   boolean|2015-02-10T00:00:...|               false|1999-10-14|      null|
| 10000157|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-10T00:00:...|             --12-31|1999-10-14|      null|
| 10000157|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-10T00:00:...|             --01-01|1999-10-14|      null|
| 10000157|        0|   REVISION_FRAVALGT|   boolean|2015-03-12T00:00:...|               false|1999-10-14|      null|
| 10000157|        0|       TEGNINGSREGEL|    string|2015-02-10T00:00:...|Selskabet tegnes ...|1999-11-04|      null|
| 10000157|        0|     VEDTÆGT_SENESTE|      date|2015-02-10T00:00:...|          1999-11-04|1999-11-04|      null|
| 10000165|        0|ARKIV_REGISTRERIN...|    string|2015-02-09T21:00:...|           A/S261921|1999-10-13|2006-05-18|
| 10000165|        0|              FORMÅL|    string|2015-02-09T21:00:...|Selskabets formål...|2000-03-24|2006-05-18|
| 10000165|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          2001-03-31|1999-10-13|2006-05-18|
| 10000165|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          1999-10-13|1999-10-13|2006-05-18|
| 10000165|        0|             KAPITAL|   decimal|2015-02-09T21:00:...|           500000.00|1999-10-13|2006-05-18|
| 10000165|        0|       KAPITALVALUTA|    string|2015-02-09T21:00:...|                 DKK|1999-10-13|2006-05-18|
| 10000165|        0|      NAVN_IDENTITET|    string|2015-02-09T21:00:...|           E.K. JORD|1999-10-13|2006-05-18|
| 10000165|        0|         PSEUDOCVRNR|   boolean|2015-02-09T21:00:...|               false|1999-10-13|2006-05-18|
| 10000165|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-09T21:00:...|             --03-31|1999-10-13|2006-05-18|
| 10000165|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-09T21:00:...|             --04-01|1999-10-13|2006-05-18|
| 10000165|        0|       TEGNINGSREGEL|    string|2015-02-09T21:00:...|Selskabet tegnes ...|2000-03-24|2006-05-18|
| 10000165|        0|     VEDTÆGT_SENESTE|      date|2015-02-09T21:00:...|          2000-03-24|2000-03-24|2006-05-18|
| 10000211|        0|ARKIV_REGISTRERIN...|    string|2015-02-10T00:00:...|           A/S261735|1999-09-29|      null|
| 10000211|        0|              FORMÅL|    string|2015-02-10T00:00:...|Selskabets formål...|2010-05-28|      null|
| 10000211|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          2000-12-31|1999-09-29|      null|
| 10000211|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          1999-09-29|1999-09-29|      null|
| 10000211|        0|             KAPITAL|   decimal|2015-02-10T00:00:...|           500000.00|1999-09-29|      null|
| 10000211|        0|       KAPITALVALUTA|    string|2015-02-10T00:00:...|                 DKK|1999-09-29|      null|
| 10000211|        0|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|UGEAVISEN FOR BIL...|1999-09-29|2001-08-08|
| 10000211|        0|      NAVN_IDENTITET|    string|2015-02-10T00:00:...|KOMPLEMENTARSELSK...|2001-08-09|      null|
| 10000211|        0|         PSEUDOCVRNR|   boolean|2015-02-10T00:00:...|               false|1999-09-29|      null|
| 10000211|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-10T00:00:...|             --12-31|1999-09-29|      null|
| 10000211|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-10T00:00:...|             --01-01|1999-09-29|      null|
| 10000211|        0|   REVISION_FRAVALGT|   boolean|2015-03-12T00:00:...|               false|1999-09-29|      null|
| 10000211|        0|       TEGNINGSREGEL|    string|2015-02-10T00:00:...|Selskabet tegnes ...|2010-05-28|      null|
| 10000211|        0|     VEDTÆGT_SENESTE|      date|2015-02-10T00:00:...|          2010-05-28|2010-05-28|      null|
| 10000238|        0|ARKIV_REGISTRERIN...|    string|2015-02-09T21:00:...|           A/S110444|1985-11-15|1989-07-05|
| 10000238|        0|              FORMÅL|    string|2015-02-09T21:00:...|Selskabets formål...|1985-11-15|1989-07-05|
| 10000238|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          1986-04-30|1985-11-15|1989-07-05|
| 10000238|        0|FØRSTE_REGNSKABSP...|      date|2015-02-09T21:00:...|          1985-11-15|1985-11-15|1989-07-05|
| 10000238|        0|             KAPITAL|   decimal|2015-02-09T21:00:...|           300000.00|1987-08-30|1989-07-05|
| 10000238|        0|       KAPITALVALUTA|    string|2015-02-09T21:00:...|                 DKK|1987-08-30|1989-07-05|
| 10000238|        0|      NAVN_IDENTITET|    string|2015-02-09T21:00:...|SANDERS KØKKENGÅR...|1985-11-15|1989-07-11|
| 10000238|        0|         PSEUDOCVRNR|   boolean|2015-02-09T21:00:...|                true|1985-11-15|1989-07-05|
| 10000238|        0|    REGNSKABSÅR_SLUT| gMonthDay|2015-02-09T21:00:...|             --04-30|1985-11-15|1989-07-05|
| 10000238|        0|   REGNSKABSÅR_START| gMonthDay|2015-02-09T21:00:...|             --05-01|1985-11-15|1989-07-05|
| 10000238|        0|       TEGNINGSREGEL|    string|2015-02-09T21:00:...|Selskabet tegnes ...|1985-11-15|1989-07-05|
| 10000238|        0|     VEDTÆGT_SENESTE|      date|2015-02-09T21:00:...|          1985-11-15|1985-11-15|1989-07-05|
| 10000254|        0|ARKIV_REGISTRERIN...|    string|2015-02-10T00:00:...|           A/S261668|1999-10-12|      null|
| 10000254|        0|              FORMÅL|    string|2015-02-10T00:00:...|Selskabets formål...|2010-06-02|      null|
| 10000254|        0|FØRSTE_REGNSKABSP...|      date|2015-02-10T00:00:...|          2000-12-31|1999-10-12|      null|
+---------+---------+--------------------+----------+--------------------+--------------------+----------+----------+
only showing top 100 rows


In [11]:
showAttributes = attributesWithValueAndPeriodDf.groupBy(["type"]).count().orderBy("count").collect()
for v in showAttributes:
    print("type: {} count: {}".format(v[0],v[1]))


type: SØV_IEF_TILSYN_TEKST count: 2
type: SØV_IEF_TILSYN count: 13
type: REGNSKABSÅR_FRITEKST count: 55
type: MYNDIGHED_ANDEN count: 99
type: STIFTET_FØR_1900 count: 144
type: SOCIAL_ØKONOMISK_VIRKSOMHED count: 193
type: OFFENTLIG_EJERBOG count: 275
type: KONCESSIONSDATO count: 351
type: BØRSNOTERET count: 355
type: STADFÆSTELSESDATO count: 501
type: STADFÆSTET_AF count: 501
type: FINANSIEL_DELTYPE count: 755
type: TILSYN_KATEGORI count: 785
type: TILLADELSESDATO_FONDSMYNDIGHED count: 1963
type: FINANSIELT_FORMÅL count: 1976
type: REGISTRERING_LIKVIDATION count: 3264
type: OPLØSNINGSTRUSSEL_SENESTE count: 4265
type: OMFATTET_AF_LOV_OM_HVIDVASK_OG_TERRORFINANSIERING count: 8648
type: GENOPTAGELSE_TVANGSOPLØSNING count: 13496
type: STATSLIG_VIRK count: 39185
type: KAPITAL_DELVIST count: 39393
type: OMLÆGNINGSPERIODE_START count: 47807
type: OMLÆGNINGSPERIODE_SLUT count: 47810
type: KAPITALKLASSER count: 62855
type: EJERREGISTRERING_UNDER_5_PROCENT count: 110211
type: REVISION_FRAVALGT count: 307594
type: FØRSTE_REGNSKABSPERIODE_START count: 488616
type: FØRSTE_REGNSKABSPERIODE_SLUT count: 488820
type: ARKIV_REGISTRERINGSNUMMER count: 504468
type: REGNSKABSÅR_START count: 516886
type: REGNSKABSÅR_SLUT count: 516888
type: PSEUDOCVRNR count: 521839
type: FORMÅL count: 524569
type: TEGNINGSREGEL count: 531251
type: VEDTÆGT_SENESTE count: 542108
type: KAPITALVALUTA count: 688207
type: KAPITAL count: 696457
type: NAVN_IDENTITET count: 990311

In [11]:
#udtrækning af kapital, gyldighedsdatoer og cvr
valuesFromAttributesDf = (attributesWithValueAndPeriodDf
                          .filter(F.col("type") == "KAPITAL")
                          .select("cvrNummer","vaerdi","gyldigFra",F.coalesce("gyldigTil",F.lit(F.current_date())).alias("gyldigTil"))
                          .withColumn(colName="varighed",col=F.datediff(end="gyldigTil",start="gyldigFra")))

In [13]:
print(valuesFromAttributesDf.select("cvrNummer").distinct().count())


499698

In [12]:
valuesFromAttributesDf.show(20)


+---------+----------+----------+----------+--------+
|cvrNummer|    vaerdi| gyldigFra| gyldigTil|varighed|
+---------+----------+----------+----------+--------+
| 26452635| 170011.00|2003-02-24|2010-01-21|    2523|
| 26452635| 140080.00|2002-01-08|2002-06-20|     163|
| 26452635| 145000.00|2002-06-21|2002-09-01|      72|
| 26452635| 155900.00|2002-09-02|2003-02-23|     174|
| 15157984|  80000.00|1991-05-08|1995-09-27|    1603|
| 15111038|1000000.00|1997-11-06|1998-06-30|     236|
| 15111038| 300000.00|1991-04-04|1991-10-08|     187|
| 15111038|7000000.00|1991-10-09|1997-11-05|    2219|
| 14731644| 300000.00|1990-11-20|1993-08-04|     988|
| 14728147| 500000.00|1992-06-30|1996-02-07|    1317|
| 14728147| 300000.00|1990-11-20|1992-06-29|     587|
| 15500387|  80000.00|1991-09-02|1995-01-10|    1226|
| 15501332|  80000.00|1991-09-02|1995-11-22|    1542|
| 15500972| 500000.00|1992-01-08|1995-02-22|    1141|
| 15500972| 300000.00|1991-09-02|1992-01-07|     127|
| 15276703| 300000.00|1991-07-05|1993-11-12|     861|
| 13917698| 325000.00|1991-10-23|1998-10-13|    2547|
| 13917698|  80000.00|1990-02-19|1991-08-08|     535|
| 13917698| 300000.00|1991-08-09|1991-10-22|      74|
| 12073097| 560000.00|1988-09-08|1994-02-18|    1989|
+---------+----------+----------+----------+--------+
only showing top 20 rows


In [13]:
#compute cummulative difference for each cvrnummer between vaerdi

windowSpec = (Window
              .partitionBy(valuesFromAttributesDf["cvrNummer"])
              .orderBy(valuesFromAttributesDf["gyldigFra"])
              )
windowSpec.rowsBetween(-1,0)
valuesAndCumDifDf = (valuesFromAttributesDf
                     .withColumn(colName="CumlativeDiff",col=F.col("vaerdi")-F.lag("vaerdi").over(windowSpec))).cache()

In [16]:
valuesAndCumDifDf.show(250)


+---------+------------+----------+----------+--------+-------------+
|cvrNummer|      vaerdi| gyldigFra| gyldigTil|varighed|CumlativeDiff|
+---------+------------+----------+----------+--------+-------------+
| 10017025|   500000.00|2000-02-01|2002-05-02|     821|         null|
| 10019052|   125000.00|2000-02-23|2002-05-27|     824|         null|
| 10026113|   125000.00|2000-06-23|2004-09-27|    1557|         null|
| 10027926|   600000.00|2000-06-08|2001-01-15|     221|         null|
| 10029325|100000000.00|2000-06-07|2000-12-17|     193|         null|
| 10029325|105000000.00|2000-12-18|2007-12-19|    2557|    5000000.0|
| 10039983|    80000.00|1987-08-30|1993-03-24|    2033|         null|
| 10040523|   125000.00|2000-07-01|2001-05-20|     323|         null|
| 10040523|   500000.00|2001-05-21|2005-10-14|    1607|     375000.0|
| 10040523|   500000.00|2005-10-15|2006-06-29|     257|          0.0|
| 10040523|  1000000.00|2006-06-30|2013-04-04|    2470|     500000.0|
| 10056535|    80000.00|1987-08-30|1992-08-07|    1804|         null|
| 10057426|   600000.00|2000-12-20|2011-07-31|    3875|         null|
| 10057426|   646154.00|2011-08-01|2017-01-05|    1984|      46154.0|
| 10063558|   125000.00|2000-12-29|2008-08-19|    2790|         null|
| 10065798|    80000.00|1987-08-30|1996-02-25|    3101|         null|
| 10065798|   125000.00|1996-02-26|2002-01-08|    2143|      45000.0|
| 10071526|   125000.00|2001-05-10|2001-09-26|     139|         null|
| 10071526|   250000.00|2001-09-27|2002-02-05|     131|     125000.0|
| 10071526|   125000.00|2002-02-06|2017-01-05|    5447|    -125000.0|
| 10079322|   125000.00|2001-12-05|2012-05-10|    3809|         null|
| 10079322|  2000000.00|2012-05-11|2017-01-05|    1700|    1875000.0|
| 10081653|   125000.00|2002-05-07|2017-01-05|    5357|         null|
| 10089514|   149682.00|2002-06-28|2017-01-05|    5305|         null|
| 10090458|   500000.00|2002-07-02|2014-01-01|    4201|         null|
| 10091519|   125000.00|2002-06-28|2017-01-05|    5305|         null|
| 10091713|   500000.00|2002-07-01|2005-02-23|     968|         null|
| 10102863|   125000.00|2003-01-08|2017-01-05|    5111|         null|
| 10108624|   125000.00|2003-01-01|2015-02-06|    4419|         null|
| 10114411|   320000.00|1987-08-30|1996-12-19|    3399|         null|
| 10114411|   500000.00|1996-12-20|2007-02-12|    3706|     180000.0|
| 10123127|   125000.00|2002-12-09|2017-01-05|    5141|         null|
| 10127351|  1000000.00|2002-12-10|2017-01-05|    5140|         null|
| 10128587|    80000.00|1987-08-30|1996-12-22|    3402|         null|
| 10128587|   125000.00|1996-12-23|2005-03-29|    3018|      45000.0|
| 10128587|   650000.00|2005-03-30|2006-11-22|     602|     525000.0|
| 10129486|    80000.00|1987-08-30|1997-03-04|    3474|         null|
| 10139767|   125000.00|2003-06-25|2004-11-18|     512|         null|
| 10139767|   125000.00|2004-11-19|2008-08-05|    1355|          0.0|
| 10145619|   125000.00|2003-07-24|2017-01-05|    4914|         null|
| 10150078|    90000.00|1987-08-30|1990-09-04|    1101|         null|
| 10150124|   125000.00|2003-09-08|2013-10-10|    3685|         null|
| 10156521|   125000.00|2003-12-17|2013-04-01|    3393|         null|
| 10200288|   300000.00|1987-08-30|1996-12-03|    3383|         null|
| 10200288|   500000.00|1996-12-04|2015-01-22|    6623|     200000.0|
| 10212782|    80000.00|1987-08-30|1997-02-24|    3466|         null|
| 10212782|   125000.00|1997-02-25|1997-11-04|     252|      45000.0|
| 10219698|    19000.00|1987-03-12|1987-08-29|     170|         null|
| 10219698|    99000.00|1987-08-30|1993-09-02|    2195|      80000.0|
| 10240972|    90000.00|1987-08-30|1987-08-17|     -13|         null|
| 10269636|   350000.00|1987-08-30|1990-07-03|    1038|         null|
| 10279836|    80000.00|1987-08-30|1995-11-27|    3011|         null|
| 10282578|    80000.00|1987-08-30|1988-09-20|     387|         null|
| 10282578|    80000.00|1988-09-21|1994-02-03|    1961|          0.0|
| 10282578|   200000.00|1994-02-04|2004-07-08|    3807|     120000.0|
| 10293537|    80000.00|1987-08-30|2000-01-10|    4516|         null|
| 10311055|    30000.00|1987-08-30|1993-06-15|    2116|         null|
| 10315107|    80000.00|1987-08-30|1991-01-08|    1227|         null|
| 10315107|   300000.00|1991-01-09|1998-01-05|    2553|     220000.0|
| 10324602|    80000.00|1987-08-30|1991-01-31|    1250|         null|
| 10325978|   300000.00|1987-08-30|1990-08-07|    1073|         null|
| 10329744|    80000.00|1987-08-30|1992-08-20|    1817|         null|
| 10329744|   200000.00|1992-08-21|1998-06-22|    2131|     120000.0|
| 10351235|  3000000.00|1987-08-30|2017-01-05|   10721|         null|
| 10366380|   300000.00|1987-08-30|1996-06-27|    3224|         null|
| 10366380|   500000.00|1996-06-28|2004-07-30|    2954|     200000.0|
| 10368901|  1200000.00|1987-08-20|1987-08-29|       9|         null|
| 10368901|  1500000.00|1987-08-30|1993-01-20|    1970|     300000.0|
| 10368901|  1600000.00|1993-01-21|1994-02-01|     376|     100000.0|
| 10368901| 26600000.00|1994-02-02|1997-06-26|    1240|        2.5E7|
| 10397707|    80000.00|1987-08-30|1991-01-22|    1241|         null|
| 10412986|   200000.00|1987-08-30|1989-10-08|     770|         null|
| 10412986|    80000.00|1989-10-09|1995-10-29|    2211|    -120000.0|
| 10412986|   800000.00|1995-10-30|2005-08-10|    3572|     720000.0|
| 10417244|   300000.00|1987-08-30|1996-11-14|    3364|         null|
| 10423872|    80000.00|1987-08-30|1989-10-13|     775|         null|
| 10427088|    80000.00|1987-08-30|1993-10-05|    2228|         null|
| 10427088|   200000.00|1993-10-06|2002-11-28|    3340|     120000.0|
| 10434408|    80000.00|1987-08-30|1993-01-28|    1978|         null|
| 10471974|    80000.00|1987-08-30|1996-11-26|    3376|         null|
| 10478375|    80000.00|1987-08-30|1987-10-25|      56|         null|
| 10478375|   600000.00|1987-10-26|1996-09-19|    3251|     520000.0|
| 10478375|   400000.00|1996-09-20|1999-07-27|    1040|    -200000.0|
| 10480493|    80000.00|1987-08-30|1989-03-29|     577|         null|
| 10481171|    80000.00|1987-08-30|1996-08-12|    3270|         null|
| 10481171|   125000.00|1996-08-13|1999-01-12|     882|      45000.0|
| 10481171|   125000.00|1999-01-13|2005-04-19|    2288|          0.0|
| 10500907|   300000.00|1987-08-30|1988-12-26|     484|         null|
| 10500907|  1000000.00|1988-12-27|1992-09-14|    1357|     700000.0|
| 10509246|  4700000.00|1987-08-12|1987-08-29|      17|         null|
| 10509246|  5000000.00|1987-08-30|1990-11-22|    1180|     300000.0|
| 10509246|  6500000.00|1990-11-23|1994-12-14|    1482|    1500000.0|
| 10517575|    80000.00|1987-08-30|1994-12-11|    2660|         null|
| 10517575|   200000.00|1994-12-12|2017-01-05|    8060|     120000.0|
| 10522749|    80000.00|1987-08-30|1996-12-26|    3406|         null|
| 10522749|   200000.00|1996-12-27|2017-01-05|    7314|     120000.0|
| 10563585|    80000.00|1987-08-30|1993-01-13|    1963|         null|
| 10576695|    80000.00|1987-08-30|1989-10-12|     774|         null|
| 10577675|    80000.00|1987-08-30|1989-02-08|     528|         null|
| 10577675|   160000.00|1989-02-09|1993-02-01|    1453|      80000.0|
| 10577675|   160000.00|1993-02-02|1995-12-17|    1048|          0.0|
| 10577675|   200000.00|1995-12-18|2004-04-01|    3027|      40000.0|
| 10597390|    80000.00|1987-08-30|1995-07-18|    2879|         null|
| 10597390|   200000.00|1995-07-19|2017-01-05|    7841|     120000.0|
| 10606683|    80000.00|1987-08-30|1990-12-11|    1199|         null|
| 10606683|   500000.00|1990-12-12|1994-11-15|    1434|     420000.0|
| 10606683|  1500000.00|1994-11-16|1995-07-13|     239|    1000000.0|
| 10606683|  2250000.00|1995-07-14|2017-01-05|    7846|     750000.0|
| 10609747|  2520000.00|1987-05-13|1987-08-29|     108|         null|
| 10609747|  2600000.00|1987-08-30|1994-12-08|    2657|      80000.0|
| 10609747|  1100000.00|1994-12-09|1996-02-26|     444|   -1500000.0|
| 10613779|   300000.00|1987-08-30|1992-01-05|    1589|         null|
| 10613779|   300000.00|1992-01-06|1993-01-20|     380|          0.0|
| 10613779|  1000000.00|1993-01-21|1994-01-31|     375|     700000.0|
| 10613779|  1100000.00|1994-02-01|2000-12-28|    2522|     100000.0|
| 10613779|  1200000.00|2000-12-29|2002-06-25|     543|     100000.0|
| 10613779|  1300000.00|2002-06-26|2017-01-05|    5307|     100000.0|
| 10615445|    80000.00|1987-08-30|1996-12-02|    3382|         null|
| 10615445|   200000.00|1996-12-03|1998-09-16|     652|     120000.0|
| 10629098|   185000.00|1987-03-30|2017-01-05|   10874|         null|
| 10635691|   300000.00|1987-03-02|1996-07-11|    3419|         null|
| 10635691|   500000.00|1996-07-12|2017-01-05|    7482|     200000.0|
| 10649897|    80000.00|1987-08-30|1995-10-08|    2961|         null|
| 10649897|   200000.00|1995-10-09|1998-09-16|    1073|     120000.0|
| 10649897|   400000.00|1998-09-17|2013-09-30|    5492|     200000.0|
| 10657288|   300000.00|1987-08-30|1991-06-28|    1398|         null|
| 10661846|    80000.00|1987-08-30|1993-11-11|    2265|         null|
| 10663075|    80000.00|1987-08-30|1995-12-14|    3028|         null|
| 10664977|    80000.00|1987-08-30|1998-04-30|    3896|         null|
| 10664977|   125000.00|1998-05-01|2005-01-14|    2450|      45000.0|
| 10677238|    80000.00|1987-08-30|1991-05-27|    1366|         null|
| 10711479|    80000.00|1987-08-30|1996-11-25|    3375|         null|
| 10711479|   200000.00|1996-11-26|2003-04-23|    2339|     120000.0|
| 10725984|    80000.00|1987-08-30|1994-09-29|    2587|         null|
| 10738601|    80000.00|1987-09-01|1991-06-18|    1386|         null|
| 10739276|  1000000.00|1987-08-30|1999-07-05|    4327|         null|
| 10785383|    80000.00|1987-08-30|1994-04-07|    2412|         null|
| 10788099|    80000.00|1987-08-30|1993-08-10|    2172|         null|
| 10793246|   300000.00|1987-08-30|1993-06-08|    2109|         null|
| 10809673|    80000.00|1987-08-30|1997-02-24|    3466|         null|
| 10829399|    80000.00|1987-09-03|1996-10-24|    3339|         null|
| 10829399|   200000.00|1996-10-25|2006-02-02|    3387|     120000.0|
| 10829399|  1000000.00|2006-02-03|2010-12-22|    1783|     800000.0|
| 10837308|    80000.00|1987-08-30|1996-12-08|    3388|         null|
| 10837308|   125000.00|1996-12-09|2017-01-05|    7332|      45000.0|
| 10901545|    80000.00|1987-08-30|1989-04-04|     583|         null|
| 10909295|   100000.00|1987-10-29|1995-03-22|    2701|         null|
| 10909295|   200000.00|1995-03-23|2017-01-05|    7959|     100000.0|
| 10964709|    80000.00|1987-09-17|1990-02-23|     890|         null|
| 11007805|   300000.00|1987-08-31|1997-01-16|    3426|         null|
| 11007805|   500000.00|1997-01-17|2005-01-30|    2935|     200000.0|
| 11007805|   510000.00|2005-01-31|2006-09-27|     604|      10000.0|
| 11049702|    80000.00|1987-08-30|1997-10-29|    3713|         null|
| 11049702|   500000.00|1997-10-30|2006-04-03|    3077|     420000.0|
| 11050808|    80000.00|1987-08-28|1996-12-22|    3404|         null|
| 11050808|   200000.00|1996-12-23|2017-01-05|    7318|     120000.0|
| 11156177|   300000.00|1987-08-30|1988-11-24|     452|         null|
| 11156177| 10000000.00|1988-11-25|1989-05-15|     171|    9700000.0|
| 11156177|100000000.00|1989-05-16|1995-09-13|    2311|        9.0E7|
| 11160131|    80000.00|1987-10-01|1990-05-18|     960|         null|
| 11204708|    80000.00|1987-10-12|1991-01-11|    1187|         null|
| 11214185|    80000.00|1987-10-06|1996-03-22|    3090|         null|
| 11243800|    80000.00|1987-10-06|1993-03-31|    2003|         null|
| 11320902|    80000.00|1987-10-02|1993-12-14|    2265|         null|
| 11320902|  1000000.00|1993-12-15|1995-08-30|     623|     920000.0|
| 11388086|    80000.00|1987-09-07|1992-06-29|    1757|         null|
| 11508294|    80000.00|1987-11-03|1990-01-28|     817|         null|
| 11508294|   115000.00|1990-01-29|1995-09-06|    2046|      35000.0|
| 11508294|   200000.00|1995-09-07|1997-02-19|     531|      85000.0|
| 11518273|    80000.00|1987-11-04|1995-11-07|    2925|         null|
| 11518273|   500000.00|1995-11-08|2004-09-12|    3231|     420000.0|
| 11518273|   750000.00|2004-09-13|2012-04-19|    2775|     250000.0|
| 11534104|    80000.00|1987-11-04|1996-10-01|    3254|         null|
| 11534104|   125000.00|1996-10-02|2001-12-05|    1890|      45000.0|
| 11543308|    30000.00|1987-11-09|1991-07-30|    1359|         null|
| 11543308|    90000.00|1991-07-31|1994-03-23|     966|      60000.0|
| 11545637|    80000.00|1987-11-04|1998-03-04|    3773|         null|
| 11547184|    80000.00|1987-11-04|1997-01-22|    3367|         null|
| 11547184|   125000.00|1997-01-23|2017-01-05|    7287|      45000.0|
| 11559948|   300000.00|1987-11-02|1996-06-27|    3160|         null|
| 11559948|   500000.00|1996-06-28|1999-11-09|    1229|     200000.0|
| 11734294|    80000.00|1987-11-25|1991-01-23|    1155|         null|
| 11759149|   300000.00|1987-12-02|1988-07-27|     238|         null|
| 11759149|   500000.00|1988-07-28|1996-11-07|    3024|     200000.0|
| 11759149|   500000.00|1996-11-08|2017-01-05|    7363|          0.0|
| 11761747|   300000.00|1987-12-17|1998-02-09|    3707|         null|
| 11784631|    80000.00|1988-01-11|1993-02-08|    1855|         null|
| 11784631|   200000.00|1993-02-09|1999-02-25|    2207|     120000.0|
| 11832172|    80000.00|1988-01-15|1996-11-27|    3239|         null|
| 11832172|   125000.00|1996-11-28|2017-01-05|    7343|      45000.0|
| 11872190|   300000.00|1988-02-24|1992-12-08|    1749|         null|
| 11872190|   500000.00|1992-12-09|1999-11-30|    2547|     200000.0|
| 11888739|    80000.00|1988-01-27|1995-10-26|    2829|         null|
| 11931332|    80000.00|1988-03-08|1997-10-17|    3510|         null|
| 11937179|    80000.00|1988-02-10|1997-11-13|    3564|         null|
| 11937179|   125000.00|1997-11-14|2017-01-05|    6992|      45000.0|
| 11967876|    80000.00|1988-02-11|1990-10-17|     979|         null|
| 11968155|   125000.00|2001-10-26|2014-06-22|    4622|         null|
| 11968155|   600000.00|2014-06-23|2017-01-05|     927|     475000.0|
| 11974937|    80000.00|1988-03-14|1992-02-28|    1446|         null|
| 11975380|    80000.00|1988-03-14|1988-06-28|     106|         null|
| 11975380|   300000.00|1988-06-29|1989-10-11|     469|     220000.0|
| 11975380|  1300000.00|1989-10-12|1998-01-08|    3010|    1000000.0|
| 11981038|    80000.00|1988-03-14|1994-08-25|    2355|         null|
| 11991777|   300000.00|1988-03-28|1997-03-14|    3273|         null|
| 12009690|    80000.00|1988-03-14|1994-03-28|    2205|         null|
| 12009690|   200000.00|1994-03-29|2010-08-09|    5977|     120000.0|
| 12009690|   250000.00|2010-08-10|2017-01-05|    2340|      50000.0|
| 12029187|    80000.00|1988-04-06|1996-12-22|    3182|         null|
| 12029187|   200000.00|1996-12-23|2017-01-05|    7318|     120000.0|
| 12031491|    80000.00|1988-04-11|1990-01-24|     653|         null|
| 12031491|   300000.00|1990-01-25|1991-11-27|     671|     220000.0|
| 12031491|   500000.00|1991-11-28|1997-10-22|    2155|     200000.0|
| 12049196|    90000.00|1988-06-23|1991-04-11|    1022|         null|
| 12056990|    80000.00|1988-04-08|2002-05-23|    5158|         null|
| 12080417|   150000.00|2003-03-12|2003-09-08|     180|         null|
| 12080417|   200000.00|2003-09-09|2010-07-30|    2516|      50000.0|
| 12103387|    80000.00|1988-05-11|1991-02-27|    1022|         null|
| 12108281|    80000.00|1988-05-18|1989-03-13|     299|         null|
| 12108281|   300000.00|1989-03-14|1991-10-08|     938|     220000.0|
| 12108281|   300000.00|1991-10-09|1993-12-17|     800|          0.0|
| 12120184|    80000.00|1988-05-11|1992-07-29|    1540|         null|
| 12139640|    80000.00|1987-08-30|1994-08-03|    2530|         null|
| 12143737|    80000.00|1988-06-01|1997-01-07|    3142|         null|
| 12143737|   125000.00|1997-01-08|2002-09-15|    2076|      45000.0|
| 12143737|  1000000.00|2002-09-16|2011-03-01|    3088|     875000.0|
| 12144040|    80000.00|1988-05-31|1997-10-16|    3425|         null|
| 12155816|   125000.00|2004-01-26|2017-01-05|    4728|         null|
| 12184573|    80000.00|1988-06-07|1992-10-02|    1578|         null|
| 12211937|    80000.00|1988-06-28|1995-10-09|    2659|         null|
| 12211937|   200000.00|1995-10-10|2009-05-12|    4963|     120000.0|
| 12235437|    80000.00|1988-05-11|1996-11-26|    3121|         null|
| 12246374|    80000.00|1988-07-04|1993-11-04|    1949|         null|
| 12246374|   200000.00|1993-11-05|2005-02-14|    4119|     120000.0|
| 12246609|    80000.00|1988-07-04|1993-12-09|    1984|         null|
| 12246609|   200000.00|1993-12-10|1994-09-28|     292|     120000.0|
| 12246609|  1200000.00|1994-09-29|2001-05-28|    2433|    1000000.0|
| 12294948|    80000.00|1988-07-25|1990-12-10|     868|         null|
| 12294948|    80000.00|1990-12-11|1994-04-08|    1214|          0.0|
| 12317743|    80000.00|1988-08-01|1990-05-02|     639|         null|
| 12317743|    84000.00|1990-05-03|1993-10-07|    1253|       4000.0|
| 12317743|   504000.00|1993-10-08|1995-11-13|     766|     420000.0|
| 12339283|    80000.00|1988-08-01|1991-06-26|    1059|         null|
| 12352700|    80000.00|1988-08-11|1996-01-31|    2729|         null|
| 12387997|    80000.00|1988-08-01|1991-01-07|     889|         null|
| 12406657|   125000.00|2007-08-23|2017-01-05|    3423|         null|
| 12407718|   200000.00|1997-01-29|2017-01-05|    7281|         null|
| 12427190|   100000.00|1988-10-12|1998-02-12|    3410|         null|
| 12427190|   125000.00|1998-02-13|2017-01-05|    6901|      25000.0|
| 12428405|    90000.00|1988-12-21|1994-12-20|    2190|         null|
+---------+------------+----------+----------+--------+-------------+
only showing top 250 rows


In [18]:
#check who is væksting in year x
vaekst2015Df = (valuesAndCumDifDf
                .where(condition=F.year("gyldigFra") == 2015)
                .na.drop(subset="CumlativeDiff")
                .orderBy(["CumlativeDiff","cvrNummer"],ascending=[0,0]))
vaekst2015Df.show()


+---------+-------------+----------+----------+--------+--------------+
|cvrNummer|       vaerdi| gyldigFra| gyldigTil|varighed| CumlativeDiff|
+---------+-------------+----------+----------+--------+--------------+
| 10519608|8045000000.00|2015-02-05|2017-01-09|     704|         2.0E9|
| 12867336|9508000000.00|2015-06-19|2017-01-09|     570|         1.2E9|
| 25679288|2000000000.00|2015-03-02|2017-01-09|     679|         1.0E9|
| 36533846|1000000000.00|2015-04-07|2017-01-09|     643|        9.99E8|
| 22460218| 957543745.56|2015-07-13|2015-12-14|     154|9.5026374556E8|
| 29223009| 750000000.00|2015-05-18|2015-11-10|     176|        5.75E8|
| 34486816| 500000000.00|2015-12-22|2017-01-09|     384|       4.995E8|
| 32345794| 500000000.00|2015-02-01|2017-01-09|     708|       4.995E8|
| 36058552|3062554419.00|2015-09-01|2017-01-09|     496|  3.42054419E8|
| 34720819| 303655700.00|2015-02-20|2015-09-27|     219|    3.035536E8|
| 73349613| 720000000.00|2015-05-13|2017-01-09|     607|         3.0E8|
| 32081576| 361218000.00|2015-10-06|2017-01-09|     461|        2.46E8|
| 33971443| 500000000.00|2015-05-18|2015-12-03|     199|         2.4E8|
| 36685719| 236450000.00|2015-07-03|2015-09-30|      89|      2.3595E8|
| 36020873| 236450000.00|2015-07-09|2015-09-30|      83|      2.3595E8|
| 32127711| 300000000.00|2015-01-29|2017-01-09|     711|        2.34E8|
| 11814913|2728000000.00|2015-04-09|2017-01-09|     641|        2.05E8|
| 36937661| 200000000.00|2015-08-25|2017-01-09|     503|       1.995E8|
| 36700386| 200000000.00|2015-05-29|2017-01-09|     591|       1.995E8|
| 35209948| 400540000.00|2015-06-15|2017-01-09|     574|        1.75E8|
+---------+-------------+----------+----------+--------+--------------+
only showing top 20 rows


In [17]:
print(vaekst2015Df.select("cvrNummer").distinct().count())


5526

In [18]:
#creating deltager as a table
deltagerRelationDf = createNextLayerTable(cvrDf,["cvrNummer"],"deltagerRelation","virksomhed")
#deltagerRelationDf.printSchema()
deltagerRelationCol = deltagerRelationDf.columns
print(deltagerRelationCol)
deltagerRelationCol.remove("deltager")
#deltagerRelationDf.show()

deltagerDf = expandSubCols(deltagerRelationDf,"deltager")
deltagerDf.show(1)
#deltagerDf.printSchema()
deltagerDf = deltagerDf.drop("kontorsteder").drop("organisationer")
deltagerCols = deltagerDf.columns
deltagerCols.remove("navne")

deltagerNamesDf = createNextLayerTable(df=deltagerDf,nonExplodedColumns=deltagerCols,explodedColumn="navne")
deltagerNamesDf.show(10)


['cvrNummer', 'deltager', 'kontorsteder', 'organisationer']
+---------+------------+--------------------+--------------------+------------+----------+-----------------+--------------------+--------------------+
|cvrNummer|kontorsteder|      organisationer| beliggenhedsadresse|enhedsNummer|enhedstype|forretningsnoegle|               navne|       sidstIndlaest|
+---------+------------+--------------------+--------------------+------------+----------+-----------------+--------------------+--------------------+
| 27850367|          []|[[WrappedArray([0...|[[null,null,null,...|  4000218628|    PERSON|             null|[[Gitte Heegaard ...|2015-12-10T04:38:...|
+---------+------------+--------------------+--------------------+------------+----------+-----------------+--------------------+--------------------+
only showing top 1 row

+---------+--------------------+------------+----------+-----------------+--------------------+--------------------+--------------------+--------------------+
|cvrNummer| beliggenhedsadresse|enhedsNummer|enhedstype|forretningsnoegle|       sidstIndlaest|                navn|             periode|      sidstOpdateret|
+---------+--------------------+------------+----------+-----------------+--------------------+--------------------+--------------------+--------------------+
| 27850367|[[null,null,null,...|  4000218628|    PERSON|             null|2015-12-10T04:38:...| Gitte Heegaard Amby|         [null,null]|                null|
| 36376554|[[null,null,Kongs...|  4006222194|    PERSON|             null|2015-12-10T04:38:...|Anna-Sophie Schomann|         [null,null]|                null|
| 36376554|[[null,null,null,...|  4006222195|    PERSON|             null|2015-12-10T01:20:...|   Elsebet Rasmussen|         [null,null]|                null|
| 26452635|[[null,null,Himme...|     2653504|VIRKSOMHED|         10056322|2015-06-23T15:50:...|CAT-SYMBION INNOV...|[2001-01-25,2009-...|2013-11-22T19:12:...|
| 26452635|[[null,null,Himme...|     2653504|VIRKSOMHED|         10056322|2015-06-23T15:50:...|          KR 279 A/S|[2000-12-01,2001-...|2000-12-15T05:14:...|
| 26452635|[[null,null,Himme...|     2678531|VIRKSOMHED|         10073014|2015-06-23T15:55:...|        CAT SEED A/S|[2001-05-07,2014-...|2014-02-26T08:05:...|
| 26452635|[[null,null,null,...|     5105513|VIRKSOMHED|         26052394|2015-12-02T23:00:...|    JUT NR. 2180 A/S|[2001-04-01,2001-...|2001-05-30T10:43:...|
| 26452635|[[null,null,null,...|     5105513|VIRKSOMHED|         26052394|2015-12-02T23:00:...|EASY BUSINESS STA...|[2001-08-15,2003-...|2001-09-06T13:50:...|
| 26452635|[[null,null,null,...|     5105513|VIRKSOMHED|         26052394|2015-12-02T23:00:...|ENERGY BUSINESS A...|[2003-02-28,2005-...|2003-04-14T10:01:...|
| 26452635|[[null,null,null,...|     5105513|VIRKSOMHED|         26052394|2015-12-02T23:00:...|EY SERVICE STATSA...|[2005-06-29,2009-...|2005-06-30T10:44:...|
+---------+--------------------+------------+----------+-----------------+--------------------+--------------------+--------------------+--------------------+
only showing top 10 rows


In [19]:
deltagerNamesDf.printSchema()


root
 |-- cvrNummer: long (nullable = true)
 |-- beliggenhedsadresse: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- bogstavFra: string (nullable = true)
 |    |    |-- bogstavTil: string (nullable = true)
 |    |    |-- bynavn: string (nullable = true)
 |    |    |-- conavn: string (nullable = true)
 |    |    |-- etage: string (nullable = true)
 |    |    |-- fritekst: string (nullable = true)
 |    |    |-- husnummerFra: long (nullable = true)
 |    |    |-- husnummerTil: long (nullable = true)
 |    |    |-- kommune: struct (nullable = true)
 |    |    |    |-- kommuneKode: long (nullable = true)
 |    |    |    |-- kommuneNavn: string (nullable = true)
 |    |    |    |-- periode: struct (nullable = true)
 |    |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- landekode: string (nullable = true)
 |    |    |-- periode: struct (nullable = true)
 |    |    |    |-- gyldigFra: string (nullable = true)
 |    |    |    |-- gyldigTil: string (nullable = true)
 |    |    |-- postboks: string (nullable = true)
 |    |    |-- postdistrikt: string (nullable = true)
 |    |    |-- postnummer: long (nullable = true)
 |    |    |-- sidedoer: string (nullable = true)
 |    |    |-- sidstOpdateret: string (nullable = true)
 |    |    |-- vejkode: long (nullable = true)
 |    |    |-- vejnavn: string (nullable = true)
 |-- enhedsNummer: long (nullable = true)
 |-- enhedstype: string (nullable = true)
 |-- forretningsnoegle: long (nullable = true)
 |-- sidstIndlaest: string (nullable = true)
 |-- navn: string (nullable = true)
 |-- periode: struct (nullable = true)
 |    |-- gyldigFra: string (nullable = true)
 |    |-- gyldigTil: string (nullable = true)
 |-- sidstOpdateret: string (nullable = true)


In [53]:
expandedHovedBrancheWithDateDf.columns


Out[53]:
['cvrNummer',
 'branchekode',
 'branchetekst',
 'sidstOpdateret',
 'gyldigFra',
 'gyldigTil']

In [62]:
brancheList = ["cvrNummer","hovedbranche"]#"bibranche1","bibranche2","bibranche3"

companyBrancherDf = (cvrDf
                     .select([F.col("virksomhed."+f) for f in brancheList]))
#companyBrancherDf.show()
brancheList.remove("hovedbranche")
expandedHovedBrancheDf = createNextLayerTable(df=companyBrancherDf,nonExplodedColumns=brancheList,explodedColumn="hovedbranche")

expandedHovedBrancheWithDateDf = (expandSubCols(expandedHovedBrancheDf,"periode")
                                  .orderBy(["cvrNummer","gyldigFra"],ascending=[1,1]))
#expandedHovedBrancheWithDateDf.show(truncate=False)


hovedbrancher2015Df = (expandedHovedBrancheWithDateDf
                       .filter((F.year(F.col("gyldigFra"))==2015))
                       .withColumn(colName="DayOfYear",col=F.dayofyear(F.col("gyldigFra")))
                       .groupBy(expandedHovedBrancheWithDateDf.columns)
                       .max("DayOfYear"))   
hovedbrancher2015Df.show()

hovedbrancher2015Cols = hovedbrancher2015Df.columns

prefixedHovedBranch2015Df = (hovedbrancher2015Df
                             .select([F.col(x).alias("branche"+x) for x in hovedbrancher2015Cols]))
#prefixedHovedBranch2015Df.show()


+---------+-----------+--------------------+--------------------+----------+----------+--------------+
|cvrNummer|branchekode|        branchetekst|      sidstOpdateret| gyldigFra| gyldigTil|max(DayOfYear)|
+---------+-----------+--------------------+--------------------+----------+----------+--------------+
| 10001382|     467700|Engroshandel med ...|2015-05-08T15:07:...|2015-01-01|      null|             1|
| 10002230|     682040|Udlejning af erhv...|2016-01-07T14:13:...|2015-01-01|      null|             1|
| 10002273|     682040|Udlejning af erhv...|2015-11-03T13:36:...|2015-04-15|2015-10-23|           105|
| 10009561|     642010|Finansielle holdi...|2015-05-18T09:05:...|2015-01-01|      null|             1|
| 10012589|     682040|Udlejning af erhv...|2015-03-05T07:09:...|2015-03-01|      null|            60|
| 10012988|     649900|Anden finansiel f...|2015-02-13T16:05:...|2015-02-13|      null|            44|
| 10020263|     811000|Kombinerede servi...|2015-05-19T09:42:...|2015-05-19|      null|           139|
| 10022967|     749090|Andre liberale, v...|2015-07-28T15:49:...|2015-07-15|      null|           196|
| 10026121|     649900|Anden finansiel f...|2015-02-03T12:57:...|2015-01-30|      null|            30|
| 10029147|     749090|Andre liberale, v...|2015-01-10T17:05:...|2015-01-12|      null|            12|
| 10031567|     464700|Engroshandel med ...|2016-02-22T08:52:...|2015-01-01|      null|             1|
| 10031923|     900300|  Kunstnerisk skaben|2015-08-30T13:15:...|2015-08-30|      null|           242|
| 10038766|     477890|Detailhandel med ...|2014-11-19T10:20:...|2015-01-01|      null|             1|
| 10039584|     682040|Udlejning af erhv...|2016-02-01T17:19:...|2015-10-02|      null|           275|
| 10040019|     642020|Ikke-finansielle ...|2016-02-29T14:35:...|2015-07-01|      null|           182|
| 10042585|     351400|Handel med elektr...|2015-04-23T09:06:...|2015-04-23|      null|           113|
| 10043425|     649900|Anden finansiel f...|2015-10-15T11:56:...|2015-01-01|      null|             1|
| 10045517|     649900|Anden finansiel f...|2015-08-18T14:45:...|2015-08-18|      null|           230|
| 10048761|     236300|Fremstilling af f...|2015-01-09T13:48:...|2015-01-09|      null|             9|
| 10054958|     812290|Anden rengøring a...|2015-06-26T10:57:...|2015-01-01|      null|             1|
+---------+-----------+--------------------+--------------------+----------+----------+--------------+
only showing top 20 rows

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-62-bc7e521d5708> in <module>()
     21 hovedbrancher2015Cols = hovedbrancher2015Df.columns
     22 
---> 23 prefixedHovedBranch2015Df = (ovedbrancher2015Df
     24                              .select([F.col(x).alias("branche"+x) for x in hovedbrancher2015Cols]))
     25 #prefixedHovedBranch2015Df.show()

NameError: name 'ovedbrancher2015Df' is not defined

In [63]:
#investigate whether a cvrNumber is listed twice in 
hovedbrancher2015Df.select("cvrNummer").groupby().count().show()
hovedbrancher2015Df.select("cvrNummer").distinct().groupby().count().show()


+-----+
|count|
+-----+
|96334|
+-----+

+-----+
|count|
+-----+
|92159|
+-----+


In [21]:
#expandedHovedBrancheWithDateDf.select(F.col("cvrNummer")
#                                      ,F.year(F.col("gyldigFra")).alias("aar")
#                                      ,F.col("branchekode")
#                                      ,F.col("branchetekst")).groupBy(F.col("branchekode"),F.col("branchetekst"),F.col("aar")).count().show()


+-----------+--------------------+----+-----+
|branchekode|        branchetekst| aar|count|
+-----------+--------------------+----+-----+
|     741020|Kommunikationsdes...|2008| 1779|
|     514610|Engroshandel med ...|2000|   51|
|     741490|Anden virksomheds...|2003| 2909|
|     713400|Udlejning af mask...|2005|  167|
|     851210|Alment praktisere...|1986|  124|
|     912000|       Fagforeninger|1986|   24|
|     316290|Fremstilling af a...|2007|   39|
|     139210|Fremstilling af b...|2008|  182|
|     331030|Fremstilling af e...|2000|    4|
|     512200|             Rumfart|1985|   12|
|     522120|Parkering og vejh...|2008|  258|
|     711230|Opstilling og lev...|2008|   31|
|     452510|   Murerforretninger|1985|  110|
|     515400|Engroshandel med ...|2002|   66|
|     332090|Fremstilling af a...|2002|   15|
|     513900|Ikke-specialisere...|2003|   59|
|     282900|Fremstilling af a...|2008|  152|
|     602200|          Taxikørsel|1973|   49|
|     515100|Engroshandel med ...|2003|   32|
|     631100|Databehandling, w...|2003|    3|
+-----------+--------------------+----+-----+
only showing top 20 rows


In [45]:
vaekst2015Cols = vaekst2015Df.columns
branche2015Cols = prefixedHovedBranch2015Df.columns
vaekstWithBrancher2015Df = (vaekst2015Df
                            .join(prefixedHovedBranch2015Df,vaekst2015Df["cvrNummer"]==prefixedHovedBranch2015Df["branchecvrNummer"],"outer")
                           )


root
 |-- cvrNummer: long (nullable = true)
 |-- vaerdi: string (nullable = true)
 |-- gyldigFra: string (nullable = true)
 |-- gyldigTil: string (nullable = true)
 |-- varighed: integer (nullable = true)
 |-- CumlativeDiff: double (nullable = true)
 |-- branchecvrNummer: long (nullable = true)
 |-- branchebranchekode: string (nullable = true)
 |-- branchebranchetekst: string (nullable = true)
 |-- branchesidstOpdateret: string (nullable = true)
 |-- branchegyldigFra: string (nullable = true)
 |-- branchegyldigTil: string (nullable = true)

+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
|cvrNummer|      vaerdi| gyldigFra| gyldigTil|varighed|CumlativeDiff|branchecvrNummer|branchebranchekode| branchebranchetekst|branchesidstOpdateret|branchegyldigFra|branchegyldigTil|
+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
|     null|        null|      null|      null|    null|         null|        10194164|            981000|Private husholdni...| 2015-08-20T16:21:...|      2015-04-15|            null|
|     null|        null|      null|      null|    null|         null|        12920547|            683110| Ejendomsmæglere mv.| 2015-06-05T08:09:...|      2015-01-01|            null|
|     null|        null|      null|      null|    null|         null|        13054533|            682040|Udlejning af erhv...| 2015-09-25T10:23:...|      2015-09-25|            null|
|     null|        null|      null|      null|    null|         null|        13545731|            900300|  Kunstnerisk skaben| 2015-03-20T11:17:...|      2015-03-10|            null|
|     null|        null|      null|      null|    null|         null|        17201948|            649900|Anden finansiel f...| 2015-09-30T17:12:...|      2015-06-01|            null|
|     null|        null|      null|      null|    null|         null|        17313088|            702100|Public relations ...| 2015-03-24T16:15:...|      2015-04-01|            null|
|     null|        null|      null|      null|    null|         null|        17944673|            256200| Maskinforarbejdning| 2015-07-15T10:15:...|      2015-07-01|            null|
|     null|        null|      null|      null|    null|         null|        19337731|            433410|   Malerforretninger| 2015-06-10T10:23:...|      2015-07-01|            null|
| 20016175|264453099.00|2015-06-17|2017-01-05|     568| 1.14453099E8|            null|              null|                null|                 null|            null|            null|
|     null|        null|      null|      null|    null|         null|        20035331|            691000|    Juridisk bistand| 2015-02-02T19:05:...|      2015-01-01|            null|
|     null|        null|      null|      null|    null|         null|        20427132|            702100|Public relations ...| 2015-11-10T10:08:...|      2015-11-05|            null|
|     null|        null|      null|      null|    null|         null|        20820330|            682040|Udlejning af erhv...| 2015-12-16T14:14:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21091871|            561010|        Restauranter| 2015-10-09T14:10:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21609897|            433410|   Malerforretninger| 2015-12-15T12:11:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21627631|            731110|     Reklamebureauer| 2015-07-31T15:07:...|      2015-08-01|            null|
| 21734144|    50000.00|2015-06-03|2017-01-05|     582|     -85000.0|            null|              null|                null|                 null|            null|            null|
|     null|        null|      null|      null|    null|         null|        21847305|            741010|Industriel design...| 2015-05-19T05:07:...|      2015-06-01|            null|
|     null|        null|      null|      null|    null|         null|        21943436|            433900|Anden bygningsfær...| 2016-04-24T18:05:...|      2015-07-15|      2016-04-30|
|     null|        null|      null|      null|    null|         null|        21985333|            960400|Aktiviteter vedrø...| 2015-06-07T13:07:...|      2015-06-05|            null|
| 25042573| 20000000.00|2015-10-01|2017-01-05|     462|       1.95E7|            null|              null|                null|                 null|            null|            null|
+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
only showing top 20 rows


In [46]:
vaekstWithBrancher2015Df.show()


+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
|cvrNummer|      vaerdi| gyldigFra| gyldigTil|varighed|CumlativeDiff|branchecvrNummer|branchebranchekode| branchebranchetekst|branchesidstOpdateret|branchegyldigFra|branchegyldigTil|
+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
|     null|        null|      null|      null|    null|         null|        10194164|            981000|Private husholdni...| 2015-08-20T16:21:...|      2015-04-15|            null|
|     null|        null|      null|      null|    null|         null|        12920547|            683110| Ejendomsmæglere mv.| 2015-06-05T08:09:...|      2015-01-01|            null|
|     null|        null|      null|      null|    null|         null|        13054533|            682040|Udlejning af erhv...| 2015-09-25T10:23:...|      2015-09-25|            null|
|     null|        null|      null|      null|    null|         null|        13545731|            900300|  Kunstnerisk skaben| 2015-03-20T11:17:...|      2015-03-10|            null|
|     null|        null|      null|      null|    null|         null|        17201948|            649900|Anden finansiel f...| 2015-09-30T17:12:...|      2015-06-01|            null|
|     null|        null|      null|      null|    null|         null|        17313088|            702100|Public relations ...| 2015-03-24T16:15:...|      2015-04-01|            null|
|     null|        null|      null|      null|    null|         null|        17944673|            256200| Maskinforarbejdning| 2015-07-15T10:15:...|      2015-07-01|            null|
|     null|        null|      null|      null|    null|         null|        19337731|            433410|   Malerforretninger| 2015-06-10T10:23:...|      2015-07-01|            null|
| 20016175|264453099.00|2015-06-17|2017-01-05|     568| 1.14453099E8|            null|              null|                null|                 null|            null|            null|
|     null|        null|      null|      null|    null|         null|        20035331|            691000|    Juridisk bistand| 2015-02-02T19:05:...|      2015-01-01|            null|
|     null|        null|      null|      null|    null|         null|        20427132|            702100|Public relations ...| 2015-11-10T10:08:...|      2015-11-05|            null|
|     null|        null|      null|      null|    null|         null|        20820330|            682040|Udlejning af erhv...| 2015-12-16T14:14:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21091871|            561010|        Restauranter| 2015-10-09T14:10:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21609897|            433410|   Malerforretninger| 2015-12-15T12:11:...|      2015-10-01|            null|
|     null|        null|      null|      null|    null|         null|        21627631|            731110|     Reklamebureauer| 2015-07-31T15:07:...|      2015-08-01|            null|
| 21734144|    50000.00|2015-06-03|2017-01-05|     582|     -85000.0|            null|              null|                null|                 null|            null|            null|
|     null|        null|      null|      null|    null|         null|        21847305|            741010|Industriel design...| 2015-05-19T05:07:...|      2015-06-01|            null|
|     null|        null|      null|      null|    null|         null|        21943436|            433900|Anden bygningsfær...| 2016-04-24T18:05:...|      2015-07-15|      2016-04-30|
|     null|        null|      null|      null|    null|         null|        21985333|            960400|Aktiviteter vedrø...| 2015-06-07T13:07:...|      2015-06-05|            null|
| 25042573| 20000000.00|2015-10-01|2017-01-05|     462|       1.95E7|            null|              null|                null|                 null|            null|            null|
+---------+------------+----------+----------+--------+-------------+----------------+------------------+--------------------+---------------------+----------------+----------------+
only showing top 20 rows


In [ ]: