In [1]:
from recommender import Recommender
from pyspark.ml.evaluation import RegressionEvaluator
from eval_model import TopQuantileEvaluator, NDCGEvaluator, NDCG10Evaluator
from pyspark.sql import functions as F


---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/utils.py in deco(*a, **kw)
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:

/usr/local/Cellar/apache-spark/2.1.1/libexec/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    318                     "An error occurred while calling {0}{1}{2}.\n".
--> 319                     format(target_id, ".", name), value)
    320             else:

Py4JJavaError: An error occurred while calling o27.sessionState.
: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionState':
	at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:981)
	at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
	at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
	... 13 more
Caused by: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog':
	at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:169)
	at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
	at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
	at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
	... 18 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
	... 26 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
	at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
	... 31 more
Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
	... 39 more
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	... 40 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	... 46 more
Caused by: javax.jdo.JDOFatalDataStoreException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
	at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
	at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
	at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
	at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
	at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
	at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
	... 105 more
------

NestedThrowables:
java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
	at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
	at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
	at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
	at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
	at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
	at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
	... 105 more
------

	at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:436)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	... 51 more
Caused by: java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
	at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
	at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
	at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
	at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
	at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
	at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
	at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
	at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
	at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
	at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
	at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
	at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
	at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
	at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
	at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
	at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
	... 105 more
------

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at com.jolbox.bonecp.PoolUtil.generateSQLException(PoolUtil.java:192)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:422)
	at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
	at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
	at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
	at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
	at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
	at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
	at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
	... 80 more
Caused by: java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
	at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
	at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
	at java.sql.DriverManager.getConnection(DriverManager.java:664)
	at java.sql.DriverManager.getConnection(DriverManager.java:208)
	at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
	at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
	... 92 more
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
	... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
	at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
	at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
	at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
	at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
	at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
	... 105 more


During handling of the above exception, another exception occurred:

IllegalArgumentException                  Traceback (most recent call last)
<ipython-input-1-396c23e42bfc> in <module>()
      1 from recommender import Recommender
      2 from pyspark.ml.evaluation import RegressionEvaluator
----> 3 from eval_model import TopQuantileEvaluator, NDCGEvaluator, NDCG10Evaluator
      4 from pyspark.sql import functions as F

/Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/eval_model.py in <module>()
     13     ps.sql.SparkSession.builder
     14     # .master("local[8]")
---> 15     .appName("eval_model")
     16     .getOrCreate()
     17 )

/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/session.py in getOrCreate(self)
    177                     session = SparkSession(sc)
    178                 for key, value in self._options.items():
--> 179                     session._jsparkSession.sessionState().conf().setConfString(key, value)
    180                 for key, value in self._options.items():
    181                     session.sparkContext._conf.set(key, value)

/usr/local/Cellar/apache-spark/2.1.1/libexec/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1131         answer = self.gateway_client.send_command(command)
   1132         return_value = get_return_value(
-> 1133             answer, self.gateway_client, self.target_id, self.name)
   1134 
   1135         for temp_arg in temp_args:

/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/utils.py in deco(*a, **kw)
     77                 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
     78             if s.startswith('java.lang.IllegalArgumentException: '):
---> 79                 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
     80             raise
     81     return deco

IllegalArgumentException: "Error while instantiating 'org.apache.spark.sql.hive.HiveSessionState':"

In [ ]:
rmse_evaluator = RegressionEvaluator(
    metricName="rmse", labelCol="rating", predictionCol="prediction")

quant_evaluator = TopQuantileEvaluator()

ndcg_evaluator = NDCGEvaluator()

ndcg10_evaluator = NDCG10Evaluator()

In [22]:
# Load restaurant reviews
reviews_df = spark.read.parquet('../data/ratings_ugt10_igt10')

# Randomly split data into train and test datasets
train_df, test_df = reviews_df.randomSplit(weights=[0.5, 0.5])

print(train_df.printSchema())


root
 |-- user: integer (nullable = true)
 |-- item: integer (nullable = true)
 |-- rating: byte (nullable = true)

None

In [23]:
estimator = Recommender(
    useALS=True,
    useBias=True,
    lambda_1=7,
    lambda_2=12,
    userCol='user',
    itemCol='item',
    ratingCol='rating',
    rank=76,
    regParam=0.7,
    maxIter=15,
    nonnegative=True
)
model = estimator.fit(train_df)

train_predictions_df = model.transform(train_df)
predictions_df = model.transform(test_df)

print(predictions_df.printSchema())

train_predictions_df.registerTempTable("train_predictions_df")
predictions_df.registerTempTable("predictions_df")


root
 |-- user: integer (nullable = true)
 |-- item: integer (nullable = true)
 |-- rating: byte (nullable = true)
 |-- prediction: double (nullable = true)

None

In [24]:
# print('rmse: ', rmse_evaluator.evaluate(predictions_df))
# print('quant: ', quant_evaluator.evaluate(predictions_df))
print('train ndcg: ', ndcg_evaluator.evaluate(train_predictions_df))
print('test ndcg: ', ndcg_evaluator.evaluate(predictions_df))
print('train ndcg10: ', ndcg10_evaluator.evaluate(train_predictions_df))
print('test ndcg10: ', ndcg10_evaluator.evaluate(predictions_df))


train ndcg:  0.040854358939851565
test ndcg:  0.043669841712116964
train ndcg10:  0.0634047824074252
test ndcg10:  0.06786467880199432

In [25]:
predictions_df.head(40)


Out[25]:
[Row(user=148, item=1342, rating=3, prediction=3.540774848167022),
 Row(user=148, item=623, rating=3, prediction=3.513993506770806),
 Row(user=148, item=137, rating=5, prediction=3.905822759236992),
 Row(user=148, item=2027, rating=4, prediction=3.5372629508440667),
 Row(user=148, item=321, rating=4, prediction=3.2196287628480587),
 Row(user=148, item=1160, rating=4, prediction=3.2945274173632004),
 Row(user=148, item=2797, rating=2, prediction=2.565378822933188),
 Row(user=148, item=368, rating=3, prediction=3.591911341813706),
 Row(user=148, item=642, rating=4, prediction=3.1399276932696516),
 Row(user=148, item=1183, rating=3, prediction=2.9878156642558267),
 Row(user=148, item=784, rating=4, prediction=3.3050335809953797),
 Row(user=148, item=914, rating=5, prediction=3.3181439113424034),
 Row(user=148, item=2678, rating=4, prediction=3.1182221027665733),
 Row(user=148, item=236, rating=5, prediction=3.8167642708677896),
 Row(user=148, item=4200, rating=4, prediction=3.389878863929102),
 Row(user=148, item=973, rating=3, prediction=2.3138587127736123),
 Row(user=148, item=223, rating=3, prediction=3.3759119114750824),
 Row(user=148, item=388, rating=3, prediction=2.9705258815644555),
 Row(user=148, item=3673, rating=4, prediction=3.1193572669211633),
 Row(user=148, item=128, rating=5, prediction=3.705003941581893),
 Row(user=148, item=93, rating=5, prediction=3.466300782149153),
 Row(user=148, item=1055, rating=3, prediction=2.629369416920797),
 Row(user=148, item=132, rating=5, prediction=3.374166837614215),
 Row(user=148, item=1382, rating=3, prediction=3.089001365720218),
 Row(user=148, item=1773, rating=3, prediction=2.0110700289854986),
 Row(user=148, item=274, rating=4, prediction=3.2506467186920585),
 Row(user=148, item=442, rating=2, prediction=3.279309994864235),
 Row(user=148, item=2102, rating=4, prediction=2.6781772210276547),
 Row(user=148, item=1934, rating=4, prediction=2.729912896551716),
 Row(user=148, item=1538, rating=3, prediction=2.7073698000326445),
 Row(user=148, item=1113, rating=5, prediction=3.345735948219378),
 Row(user=148, item=339, rating=4, prediction=2.7733823147047785),
 Row(user=148, item=1917, rating=4, prediction=3.168207802425375),
 Row(user=148, item=1029, rating=4, prediction=3.3789341665147496),
 Row(user=148, item=3364, rating=4, prediction=3.042050416433085),
 Row(user=148, item=1360, rating=4, prediction=3.4688810846159903),
 Row(user=148, item=397, rating=5, prediction=3.333671831768095),
 Row(user=148, item=43, rating=5, prediction=3.6721713139637284),
 Row(user=148, item=511, rating=5, prediction=3.2148373995970427),
 Row(user=148, item=2257, rating=4, prediction=3.611771349530647)]

In [26]:
predictions_df.groupBy('user').count().orderBy('count', ascending=False).head(10)


Out[26]:
[Row(user=0, count=456),
 Row(user=2, count=347),
 Row(user=1, count=336),
 Row(user=3, count=304),
 Row(user=4, count=295),
 Row(user=6, count=242),
 Row(user=5, count=240),
 Row(user=7, count=216),
 Row(user=9, count=212),
 Row(user=8, count=209)]

In [27]:
df2a = spark.sql(
'''
    select
        user,
        sum(dcg) / sum(idcg) as ndcg
    from (
        select
            user,
            rating / log(2, 1 + 
                row_number() OVER (
                    PARTITION BY user
                    ORDER BY prediction DESC
                )
            ) as dcg,
            rating / log(2, 1 + 
                row_number() OVER (
                        PARTITION BY user
                        ORDER BY rating DESC
                    )
            ) as idcg
        from predictions_df
    ) x
    group by user

'''
)


df2b = spark.sql(
'''
    select 
        p.user,
        p.dcg / a.idcg as ndcg10
    from (
        select
            x.user,
            sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
        from (
            select
                user,
                rating,
                row_number() OVER (
                    PARTITION BY user
                    ORDER BY prediction DESC
                ) as pred_row_num
            from predictions_df
        ) x 
        where x.pred_row_num <= 10
        group by x.user
    ) p
    join (
        select
            x.user,
            sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
        from (
            select
                user,
                rating,
                row_number() OVER (
                    PARTITION BY user
                    ORDER BY rating DESC
                ) as actual_row_num
            from predictions_df
        ) x 
        where x.actual_row_num <= 10
        group by x.user
    ) a on a.user = p.user
'''
)

print(df2a.show(10))
print(df2b.show(10))


+----+------------------+
|user|              ndcg|
+----+------------------+
| 148|0.9646962597956135|
| 463|0.9596764020655281|
| 471|0.9678051020325722|
| 496|0.9621525950828473|
| 833|0.9631342487318908|
|1088|0.9739746963777636|
|1238|0.9768922971648027|
|1342| 0.886232397232901|
|1580|0.9561892451394816|
|1591|0.9786135657109796|
+----+------------------+
only showing top 10 rows

None
+----+------------------+
|user|            ndcg10|
+----+------------------+
| 148|0.8345057637681018|
| 463|0.8605202897368526|
| 471|0.8717908420343268|
| 496|0.8608970003302908|
| 833|0.8511244491581771|
|1088|0.8932880058765676|
|1238|0.9311331006419016|
|1342|0.6935498527651609|
|1580|0.8704978084093802|
|1591|0.9162164076223135|
+----+------------------+
only showing top 10 rows

None

In [28]:
df3 = spark.sql(
'''
select
    user,
    item,
    rating,
    prediction,
    rating / log(2, 1 + 
        row_number() OVER (
            PARTITION BY user
            ORDER BY prediction DESC
        )
    ) as dcg,
    rating / log(2, 1 + 
        row_number() OVER (
                PARTITION BY user
                ORDER BY rating DESC
            )
    ) as idcg,
    row_number() OVER (
        PARTITION BY user
        ORDER BY prediction DESC
    ) as pred_row_num,
    row_number() OVER (
        PARTITION BY user
        ORDER BY rating DESC
    ) as ideal_row_num
from predictions_df
where user = 148
order by pred_row_num
'''
)
df3.show(100)


+----+----+------+------------------+-------------------+-------------------+------------+-------------+
|user|item|rating|        prediction|                dcg|               idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
| 148| 137|     5| 3.905822759236992|                5.0|                5.0|           1|            1|
| 148| 236|     5|3.8167642708677896|  3.154648767857287|  3.154648767857287|           2|            2|
| 148| 462|     4| 3.795090406609635|                2.0|  0.884258917830015|           3|           22|
| 148|  45|     4| 3.725317086765978| 1.7227062322935722| 0.8724171679421261|           4|           23|
| 148| 128|     5| 3.705003941581893|  1.934264036172708|                2.5|           5|            3|
| 148|  43|     5|3.6721713139637284| 1.7810359355401109| 2.1533827903669653|           6|            4|
| 148|  74|     1| 3.626755793383918| 0.3333333333333333|0.15256487060115928|           7|           93|
| 148|2257|     4| 3.611771349530647| 1.2618595071429148| 0.8613531161467861|           8|           24|
| 148| 368|     3| 3.591911341813706| 0.9030899869919435| 0.4981429386478135|           9|           64|
| 148|1342|     3| 3.540774848167022| 0.8671944789536634|0.49632766175717496|          10|           65|
| 148|2027|     4|3.5372629508440667| 1.1157717826045193| 0.8509842142134525|          11|           25|
| 148|   4|     5| 3.532812136431078| 1.3511907721365988|  1.934264036172708|          12|            5|
| 148| 189|     5|3.5256628637261187| 1.3132476751859679| 1.7810359355401109|          13|            6|
| 148| 623|     3| 3.513993506770806| 0.7678740744294464|0.49455257016648113|          14|           66|
| 148|  98|     4|3.5001110174023573|                1.0| 0.8412396714286098|          15|           26|
| 148| 395|     5|3.4714944426156187|   1.22325271059113| 1.6666666666666667|          16|            7|
| 148|1360|     4|3.4688810846159903| 0.9592498662725258| 0.8320583907060379|          17|           27|
| 148|  93|     5| 3.466300782149153| 1.1770445668331913| 1.5773243839286435|          18|            8|
| 148| 180|     5|3.4570656306828695| 1.1568910657987959| 1.5051499783199058|          19|            9|
| 148|2226|     5|3.4388334685260684|  1.138351243484765| 1.4453241315894392|          20|           10|
| 148|1992|     2| 3.436241094296184| 0.4484876484351509| 0.3080784439085271|          21|           89|
| 148|  23|     1| 3.414907381837528|0.22106472945750374|0.15221034671324338|          22|           94|
| 148| 443|     2| 3.393853194277691|0.43620858397106305| 0.3073237725797285|          23|           90|
| 148|4200|     4| 3.389878863929102| 0.8613531161467861| 0.8233873298417377|          24|           28|
| 148|1029|     4|3.3789341665147496| 0.8509842142134525| 0.8151801883620248|          25|           29|
| 148| 196|     4| 3.378441645000521| 0.8412396714286098| 0.8073963463283993|          26|           30|
| 148| 223|     3|3.3759119114750824| 0.6240437930295284|0.49281614988615063|          27|           67|
| 148| 132|     5| 3.374166837614215| 1.0292341623021721| 1.3947147282556491|          28|           11|
| 148| 165|     5|3.3717244361143717|  1.018975235452531| 1.3511907721365988|          29|           12|
| 148| 528|     5| 3.366175946011351| 1.0092454329104992| 1.3132476751859679|          30|           13|
| 148|2317|     4|  3.35733122851261|                0.8|                0.8|          31|           31|
| 148|1113|     5| 3.345735948219378| 0.9911993158528026| 1.2797901240490774|          32|           14|
| 148| 397|     5| 3.333671831768095|  0.982808161164113|               1.25|          33|           15|
| 148| 229|     5|3.3319567172973965| 0.9747951094689316|   1.22325271059113|          34|           16|
| 148|1345|     4| 3.326688645739784| 0.7737056144690833| 0.7929594526822421|          35|           32|
| 148| 172|     5|3.3209650222906433| 0.9597936000328007| 1.1990623328406573|          36|           17|
| 148| 914|     5|3.3181439113424034|  0.952757062133867| 1.1770445668331913|          37|           18|
| 148| 784|     4|3.3050335809953797| 0.7568014380674801| 0.7862465289312903|          38|           33|
| 148| 389|     3| 3.299809858598893| 0.5637054741273227|0.49111696633564683|          39|           68|
| 148| 517|     4| 3.299049784109487| 0.7466096449557735| 0.7798360875751452|          40|           34|
| 148|1160|     4|3.2945274173632004| 0.7417960936614756| 0.7737056144690833|          41|           35|
| 148| 314|     3|3.2871677378502007| 0.5528664994461185| 0.4894536590505323|          42|           69|
| 148| 442|     2| 3.279309994864235|0.36633850182726724|0.30658097730535616|          43|           91|
| 148| 863|     4| 3.278311544910183|  0.728351601879753| 0.7678348800262406|          44|           36|
| 148| 274|     4|3.2506467186920585| 0.7241703871201608| 0.7622056497070936|          45|           37|
| 148|1009|     4|3.2494359988765122| 0.7201253066267705| 0.7568014380674801|          46|           38|
| 148|3002|     3|3.2459966358484564|  0.537156695253124|0.48782493681490247|          47|           70|
| 148| 820|     3| 3.225233068686384| 0.5343107806620333| 0.4862295731793669|          48|           71|
| 148| 321|     4|3.2196287628480587| 0.7087352805422317| 0.7516072988364303|          49|           39|
| 148| 511|     5|3.2148373995970427| 0.8814571719444104| 1.1568910657987959|          50|           19|
| 148|1917|     4| 3.168207802425375|  0.701700254327818| 0.7466096449557735|          51|           40|
| 148| 848|     4|3.1482207596188427| 0.6983337201921797| 0.7417960936614756|          52|           41|
| 148|1138|     5| 3.140906853881514| 0.8688267143571999|  1.138351243484765|          53|           20|
| 148| 642|     4|3.1399276932696516| 0.6918787617803084| 0.7371553325948247|          54|           42|
| 148|3673|     4|3.1193572669211633| 0.6887817351763924| 0.7326770036545345|          55|           43|
| 148|2300|     4| 3.118889106150478| 0.6857664022956538|  0.728351601879753|          56|           44|
| 148|2678|     4|3.1182221027665733| 0.6828291186548805| 0.7241703871201608|          57|           45|
| 148|2110|     4| 3.109505748018714| 0.6799664651476561| 0.7201253066267705|          58|           46|
| 148|1734|     4|3.1040262783642074| 0.6771752303951257| 0.7162089270041654|          59|           47|
| 148|2894|     4| 3.100391811868997| 0.6744523947580044| 0.7124143742160444|          60|           48|
| 148|1382|     3| 3.089001365720218| 0.5038463368711258| 0.4846664023289891|          61|           72|
| 148|1571|     4|3.0831510948380583| 0.6692007152406965| 0.7087352805422317|          62|           49|
| 148| 275|     4| 3.047165413715316| 0.6666666666666666| 0.7051657375555284|          63|           50|
| 148|3364|     4| 3.042050416433085| 0.6641905848637513|  0.701700254327818|          64|           51|
| 148|2784|     4| 3.026545764095488| 0.6617702156762333| 0.6983337201921797|          65|           52|
| 148|  62|     4| 3.018308581467604| 0.6594034268886415|   0.69506137148576|          66|           53|
| 148|2334|     3| 3.003152147583453|0.49281614988615063|0.48313431526933326|          67|           73|
| 148|1183|     3|2.9878156642558267|0.49111696633564683|0.48163225630206424|          68|           74|
| 148|2421|     4| 2.983440995763007|  0.652604878734043| 0.6918787617803084|          69|           54|
| 148|1180|     5| 2.981050530003105| 0.8130415613581707| 1.1212191210878772|          70|           21|
| 148| 388|     3|2.9705258815644555| 0.4862295731793669|  0.480159219764464|          71|           75|
| 148| 170|     4|2.9275853066395667| 0.6462218697719855| 0.6887817351763924|          72|           55|
| 148|1147|     4| 2.926051762497025| 0.6441790870257776| 0.6857664022956538|          73|           56|
| 148|1412|     4|2.8716548482976005| 0.6421763417360856| 0.6828291186548805|          74|           57|
| 148|2319|     4| 2.782274886844114| 0.6402122930192854| 0.6799664651476561|          75|           58|
| 148| 339|     4|2.7733823147047785| 0.6382856626797526| 0.6771752303951257|          76|           59|
| 148|3247|     3|2.7647109598136588|0.47729642360788244|0.47871424700981446|          77|           76|
| 148|4058|     3| 2.758133042851231| 0.4759048767467881|0.47729642360788244|          78|           77|
| 148|1934|     4| 2.729912896551716| 0.6327183637591292| 0.6744523947580044|          79|           60|
| 148|1538|     3|2.7073698000326445|0.47319731517859304| 0.4759048767467881|          80|           78|
| 148|2514|     3| 2.696926642142448|0.47187974204864064|0.47453877281934687|          81|           79|
| 148|2102|     4|2.6781772210276547| 0.6274470994377639| 0.6717951158281678|          82|           61|
| 148|1055|     3| 2.629369416920797| 0.4693133650262147|0.47319731517859304|          83|           80|
| 148|3874|     4| 2.602936560464239|    0.6240842600889| 0.6692007152406965|          84|           62|
| 148|1945|     3| 2.600118764615307|0.46683417420748186|0.47187974204864064|          85|           81|
| 148|2797|     2| 2.565378822933188| 0.3104171255403102| 0.3058497366056642|          86|           92|
| 148| 981|     4|2.5522980885954345| 0.6192495309430728| 0.6666666666666666|          87|           63|
| 148|1013|     3| 2.540723653015375| 0.4632679884033304| 0.4705853245783229|          88|           82|
| 148| 135|     3|2.4227193796557085|0.46211766586279074| 0.4693133650262147|          89|           83|
| 148| 666|     3|2.4007681011899544| 0.4609856588695927|  0.468063195066675|          90|           84|
| 148|4211|     3|2.3520939451415908|0.45987146595803424|0.46683417420748186|          91|           85|
| 148| 973|     3|2.3138587127736123|0.45877460490849636| 0.4656256883104653|          92|           86|
| 148|1038|     3| 2.120301889184338|0.45769461180347787|0.46443714820730453|          93|           87|
| 148|1773|     3|2.0110700289854986| 0.4566310401397301| 0.4632679884033304|          94|           88|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+


In [29]:
toy_df = spark.createDataFrame([
    (1,1,1,3.8), (1,2,3,3.8), (1,3,1,3.8), (1,4,1,3.8), (1,5,5,3.8),
    (1,6,4,3.8), (1,7,5,3.8), (1,8,5,3.8), (1,9,5,3.8), (1,10,5,3.8),
],['user','item','rating', 'prediction'])

toy_df.registerTempTable("toy_df")

In [30]:
df3 = spark.sql(
'''
select
    user,
    item,
    rating,
    prediction,
    rating / log(2, 1 + 
        row_number() OVER (
            PARTITION BY user
            ORDER BY prediction DESC
        )
    ) as dcg,
    rating / log(2, 1 + 
        row_number() OVER (
                PARTITION BY user
                ORDER BY rating DESC
            )
    ) as idcg,
    row_number() OVER (
        PARTITION BY user
        ORDER BY prediction DESC
    ) as pred_row_num,
    row_number() OVER (
        PARTITION BY user
        ORDER BY rating DESC
    ) as ideal_row_num
from toy_df
'''
)
df3.show(100)


+----+----+------+----------+-------------------+-------------------+------------+-------------+
|user|item|rating|prediction|                dcg|               idcg|pred_row_num|ideal_row_num|
+----+----+------+----------+-------------------+-------------------+------------+-------------+
|   1|   5|     5|       3.8|                5.0|                5.0|           1|            1|
|   1|   7|     5|       3.8|  3.154648767857287|  3.154648767857287|           2|            2|
|   1|   8|     5|       3.8|                2.5|                2.5|           3|            3|
|   1|   9|     5|       3.8| 2.1533827903669653| 2.1533827903669653|           4|            4|
|   1|  10|     5|       3.8|  1.934264036172708|  1.934264036172708|           5|            5|
|   1|   6|     4|       3.8| 1.4248287484320887| 1.4248287484320887|           6|            6|
|   1|   2|     3|       3.8|                1.0|                1.0|           7|            7|
|   1|   1|     1|       3.8| 0.3154648767857287| 0.3154648767857287|           8|            8|
|   1|   3|     1|       3.8|0.30102999566398114|0.30102999566398114|           9|            9|
|   1|   4|     1|       3.8| 0.2890648263178878| 0.2890648263178878|          10|           10|
+----+----+------+----------+-------------------+-------------------+------------+-------------+


In [31]:
avg_rating_df = (
    train_df
    .agg(
        F.avg('rating').alias('avg_rating')
    )
)

train_predict_df = (
    train_df
    .crossJoin(avg_rating_df)
    .withColumn(
        'prediction',
        F.col('avg_rating') + F.randn()
    )
    .select(
        'user',
        'item',
        'rating',
        'prediction'
    )
)

train_predict_df.registerTempTable("train_predict_df")

train_predict_df.show()


+----+----+------+------------------+
|user|item|rating|        prediction|
+----+----+------+------------------+
|   0|  22|     4| 3.438706702931956|
|   0|  34|     3| 2.472287134629834|
|   0|  43|     5| 2.800809337009305|
|   0|  62|     4|2.8652366697503315|
|   0|  74|     5| 4.203724077491838|
|   0| 106|     4|3.6165322950273247|
|   0| 134|     3|3.4806515832663814|
|   0| 146|     3|1.0257466319226944|
|   0| 149|     5|  5.40518003004515|
|   0| 188|     3| 3.016204829162767|
|   0| 190|     4|3.1782809346052545|
|   0| 222|     5|  2.96560250164553|
|   0| 230|     4| 4.012016542731463|
|   0| 350|     5| 3.745525760959535|
|   0| 399|     4| 3.892543386994308|
|   0| 403|     2|  3.68719142950705|
|   0| 434|     4|5.1836311444517555|
|   0| 457|     4| 3.593295770876345|
|   0| 464|     4| 4.961416887205116|
|   0| 533|     4|  4.65880147889484|
+----+----+------+------------------+
only showing top 20 rows


In [32]:
df4 = spark.sql(
'''
select
    user,
    item,
    rating,
    prediction,
    rating / log(2, 1 + 
        row_number() OVER (
            PARTITION BY user
            ORDER BY prediction DESC
        )
    ) as dcg,
    rating / log(2, 1 + 
        row_number() OVER (
                PARTITION BY user
                ORDER BY rating DESC
            )
    ) as idcg,
    row_number() OVER (
        PARTITION BY user
        ORDER BY prediction DESC
    ) as pred_row_num,
    row_number() OVER (
        PARTITION BY user
        ORDER BY rating DESC
    ) as ideal_row_num
from train_predict_df
where user = 148
order by pred_row_num
'''
)
df4.show(20)


+----+----+------+------------------+------------------+-------------------+------------+-------------+
|user|item|rating|        prediction|               dcg|               idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+------------------+-------------------+------------+-------------+
| 148|2304|     4|6.1706119023937624|               4.0| 0.7798360875751452|           1|           34|
| 148|1649|     4| 5.853989483210963|2.5237190142858297| 0.6918787617803084|           2|           54|
| 148| 369|     4|5.6701715093340965|               2.0|  0.884258917830015|           3|           22|
| 148| 221|     3|   5.5749159610576|1.2920296742201793|0.48163225630206424|           4|           74|
| 148| 633|     3| 5.563554499918556|1.1605584217036249|  0.480159219764464|           5|           75|
| 148|2441|     3| 5.262028353850321|1.0686215613240666|0.47187974204864064|           6|           81|
| 148| 379|     4| 5.122374288143945|1.3333333333333333| 0.8724171679421261|           7|           23|
| 148|1044|     4| 5.096395902420889|1.2618595071429148| 0.7929594526822421|           8|           32|
| 148| 155|     4| 5.092408908726938|1.2041199826559246| 0.6983337201921797|           9|           52|
| 148|1870|     4| 5.059104624332527|1.1562593052715513| 0.6887817351763924|          10|           55|
| 148|4091|     4| 5.048827400797215|1.1157717826045193|  0.701700254327818|          11|           51|
| 148|3062|     4| 4.996674799622005| 1.080952617709279| 0.7417960936614756|          12|           41|
| 148| 621|     2| 4.783922966805143|0.5252990700743871| 0.3080784439085271|          13|           89|
| 148|3447|     3| 4.774798146327548|0.7678740744294464|  0.468063195066675|          14|           84|
| 148| 829|     4| 4.774026623927967|               1.0|                0.8|          15|           31|
| 148| 805|     3| 4.769243262443236| 0.733951626354678| 0.4894536590505323|          16|           69|
| 148|4392|     4|  4.75935488148759|0.9592498662725258| 0.7371553325948247|          17|           42|
| 148|2131|     4| 4.733549388359078| 0.941635653466553| 0.7466096449557735|          18|           40|
| 148|1353|     3|  4.71437848053403|0.6941346394792774|0.49632766175717496|          19|           65|
| 148|1085|     4| 4.667151107839208| 0.910680994787812| 0.6744523947580044|          20|           60|
+----+----+------+------------------+------------------+-------------------+------------+-------------+
only showing top 20 rows


In [33]:
test_predict_df = (
    test_df
    .crossJoin(avg_rating_df)
    .withColumn(
        'prediction',
        F.col('avg_rating') + F.randn()
    )
    .select(
        'user',
        'item',
        'rating',
        'prediction'
    )
)

test_predict_df.registerTempTable("test_predict_df")

test_predict_df.show()


+----+----+------+------------------+
|user|item|rating|        prediction|
+----+----+------+------------------+
|   0|  18|     4|   5.8382427406166|
|   0|  32|     4|4.6595378220695975|
|   0|  35|     5| 3.666596910823167|
|   0|  36|     3|3.5493592850388094|
|   0|  50|     5| 5.249635815300059|
|   0|  70|     4|3.4895007637050472|
|   0|  77|     3|  2.83960383925089|
|   0|  78|     5|2.5888604065867105|
|   0|  98|     5| 4.375817741178096|
|   0| 116|     5| 4.519717794780622|
|   0| 136|     2| 4.503841947780726|
|   0| 157|     4| 5.584036452988005|
|   0| 161|     4|2.2316745115703567|
|   0| 198|     5| 4.448128710449876|
|   0| 217|     3|3.5708312971283434|
|   0| 235|     4| 4.017185579566163|
|   0| 236|     4| 3.036628914714359|
|   0| 243|     4| 3.810116619652029|
|   0| 266|     4| 2.517439511208432|
|   0| 294|     2| 5.199446980791228|
+----+----+------+------------------+
only showing top 20 rows


In [34]:
df5 = spark.sql(
'''
select
    user,
    item,
    rating,
    prediction,
    rating / log(2, 1 + 
        row_number() OVER (
            PARTITION BY user
            ORDER BY prediction DESC
        )
    ) as dcg,
    rating / log(2, 1 + 
        row_number() OVER (
                PARTITION BY user
                ORDER BY rating DESC
            )
    ) as idcg,
    row_number() OVER (
        PARTITION BY user
        ORDER BY prediction DESC
    ) as pred_row_num,
    row_number() OVER (
        PARTITION BY user
        ORDER BY rating DESC
    ) as ideal_row_num
from test_predict_df
where user = 148
'''
)
df5.show(200)


+----+----+------+------------------+-------------------+-------------------+------------+-------------+
|user|item|rating|        prediction|                dcg|               idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
| 148|1945|     3|5.8026854996111155|                3.0|0.47187974204864064|           1|           81|
| 148|1055|     3| 5.779313255718588| 1.8927892607143721|0.49111696633564683|           2|           68|
| 148| 275|     4| 5.757909587955016|                2.0| 0.7371553325948247|           3|           42|
| 148|1571|     4| 5.629198979997861| 1.7227062322935722| 0.7201253066267705|           4|           46|
| 148|2334|     3| 5.523136737160017| 1.1605584217036249| 0.4705853245783229|           5|           82|
| 148|1183|     3| 5.470647577132097| 1.0686215613240666| 0.4759048767467881|           6|           78|
| 148|1147|     4| 5.363699977681501| 1.3333333333333333| 0.6799664651476561|           7|           58|
| 148| 511|     5| 5.304830908794267| 1.5773243839286435|                2.5|           8|            3|
| 148|   4|     5| 5.224939568931342| 1.5051499783199058|  1.934264036172708|           9|            5|
| 148| 236|     5| 5.206412377914868| 1.4453241315894392| 1.3947147282556491|          10|           11|
| 148| 196|     4| 5.146903165181599| 1.1157717826045193| 0.8073963463283993|          11|           30|
| 148| 443|     2| 5.138711800469165| 0.5404763088546395| 0.3073237725797285|          12|           90|
| 148|  62|     4| 5.128536481656779| 1.0505981401487743| 0.7417960936614756|          13|           41|
| 148|1773|     3|  5.06995011719117| 0.7678740744294464|0.48782493681490247|          14|           70|
| 148| 339|     4| 5.056671922070084|                1.0| 0.8724171679421261|          15|           23|
| 148|3364|     4| 4.982075386835361| 0.9786021684729039| 0.7516072988364303|          16|           39|
| 148| 223|     3|4.9320746601755205| 0.7194373997043944| 0.4862295731793669|          17|           71|
| 148|1382|     3| 4.896091871244972| 0.7062267400999147|0.47453877281934687|          18|           79|
| 148| 784|     4|4.7818556805712324| 0.9255128526390366|  0.728351601879753|          19|           44|
| 148|4211|     3| 4.693014966230444| 0.6830107460908589|0.46683417420748186|          20|           85|
| 148| 165|     5| 4.664625292168775| 1.1212191210878772| 1.3132476751859679|          21|           13|
| 148| 973|     3|4.6317883470584045| 0.6631941883725112| 0.4632679884033304|          22|           88|
| 148| 528|     5| 4.611235837286305| 1.0905214599276576| 1.6666666666666667|          23|            7|
| 148| 914|     5| 4.555773407800349| 1.0766913951834827| 1.3511907721365988|          24|           12|
| 148|2319|     4|4.4926779238159575| 0.8509842142134525| 0.6744523947580044|          25|           60|
| 148|1038|     3| 4.470488682157669| 0.6309297535714573|0.49281614988615063|          26|           67|
| 148| 180|     5| 4.463114743001566| 1.0400729883825475| 1.4453241315894392|          27|           10|
| 148| 229|     5| 4.390235720282174| 1.0292341623021721| 1.1770445668331913|          28|           18|
| 148|4058|     3|4.3489906639724385| 0.6113851412715186|  0.468063195066675|          29|           84|
| 148| 820|     3| 4.345700289363394| 0.6055472597462995|0.49455257016648113|          30|           66|
| 148|2797|     2|4.2981852173088315|                0.4|0.30658097730535616|          31|           91|
| 148| 389|     3| 4.286605981476037| 0.5947195895116816|0.49632766175717496|          32|           65|
| 148|2257|     4| 4.277996588236268| 0.7862465289312903|   0.69506137148576|          33|           53|
| 148|1934|     4|4.2359382314030105| 0.7798360875751452| 0.8320583907060379|          34|           27|
| 148|1138|     5| 4.179226255299683|  0.967132018086354| 1.5773243839286435|          35|            8|
| 148| 397|     5| 4.164904456224879| 0.9597936000328007| 1.7810359355401109|          36|            6|
| 148|1734|     4| 4.125306962983956| 0.7622056497070936| 0.6666666666666666|          37|           63|
| 148| 642|     4| 4.062429333529082| 0.7568014380674801| 0.7326770036545345|          38|           43|
| 148|3002|     3| 4.060650358765983| 0.5637054741273227| 0.4693133650262147|          39|           83|
| 148| 666|     3| 4.039408215048402| 0.5599572337168301|0.47729642360788244|          40|           77|
| 148| 462|     4| 4.038236930489699| 0.7417960936614756| 0.7087352805422317|          41|           49|
| 148| 137|     5|  3.99209062111073| 0.9214441657435309|   1.22325271059113|          42|           16|
| 148| 981|     4|3.9689031238155676| 0.7326770036545345|                0.8|          43|           31|
| 148|1160|     4|3.9363643192737165|  0.728351601879753| 0.8509842142134525|          44|           25|
| 148| 132|     5| 3.925045402537325| 0.9052129839002011|                5.0|          45|            1|
| 148| 517|     4|3.8791514172380364| 0.7201253066267705| 0.6887817351763924|          46|           55|
| 148|2514|     3| 3.774731722355551|  0.537156695253124|  0.480159219764464|          47|           75|
| 148| 135|     3|  3.75796116530181| 0.5343107806620333| 0.4656256883104653|          48|           86|
| 148| 321|     4| 3.736257754104039| 0.7087352805422317|  0.884258917830015|          49|           22|
| 148| 189|     5|3.7351378151205905| 0.8814571719444104|  3.154648767857287|          50|            2|
| 148| 388|     3| 3.723436849366771| 0.5262751907458635| 0.4981429386478135|          51|           64|
| 148|1342|     3|3.6913581182641226| 0.5237502901441348| 0.4894536590505323|          52|           69|
| 148|2317|     4|3.6765348589177465|   0.69506137148576| 0.7678348800262406|          53|           36|
| 148|2300|     4| 3.660545813896746| 0.6918787617803084| 0.7798360875751452|          54|           34|
| 148|  23|     1| 3.639282025264064| 0.1721954337940981|0.15256487060115928|          55|           93|
| 148|2894|     4| 3.625646741900116| 0.6857664022956538| 0.7568014380674801|          56|           38|
| 148|2421|     4|3.6208588449973083| 0.6828291186548805| 0.8233873298417377|          57|           28|
| 148|1538|     3| 3.611398012490524| 0.5099748488607421|0.47319731517859304|          58|           80|
| 148|1009|     4|3.5352916195398767| 0.6771752303951257| 0.8613531161467861|          59|           24|
| 148|  43|     5|3.5131880633848414| 0.8430654934475055| 1.2797901240490774|          60|           14|
| 148|  98|     4| 3.325088499948339| 0.6717951158281678| 0.8151801883620248|          61|           29|
| 148|2110|     4|3.3079216308713755| 0.6692007152406965| 0.6983337201921797|          62|           52|
| 148| 128|     5|3.3074576034048144| 0.8333333333333334|               1.25|          63|           15|
| 148| 368|     3|3.2636829961028924| 0.4981429386478135|0.46443714820730453|          64|           87|
| 148| 395|     5| 3.226973498828029| 0.8272127695952917| 1.1568910657987959|          65|           19|
| 148|3247|     3| 3.226548638755594|0.49455257016648113|0.47871424700981446|          66|           76|
| 148|1013|     3|3.2158263106837235|0.49281614988615063|0.48163225630206424|          67|           74|
| 148| 274|     4|3.1838342467126344| 0.6548226217808625| 0.7737056144690833|          68|           35|
| 148|1180|     5| 3.142108940523942| 0.8157560984175538| 2.1533827903669653|          69|            4|
| 148|1029|     4|3.1288184471391443| 0.6504332490865367| 0.7241703871201608|          70|           45|
| 148| 314|     3| 3.092183412647548| 0.4862295731793669| 0.4846664023289891|          71|           72|
| 148|  45|     4|3.0866673216733114| 0.6462218697719855| 0.7124143742160444|          72|           48|
| 148|3874|     4| 2.956494323118478| 0.6441790870257776| 0.6918787617803084|          73|           54|
| 148|2102|     4|  2.94527267526665| 0.6421763417360856| 0.7862465289312903|          74|           33|
| 148| 172|     5|2.9328775534542224| 0.8002653662741067| 1.1990623328406573|          75|           17|
| 148|  74|     1|2.8972746545587538|0.15957141566993815|0.15221034671324338|          76|           94|
| 148| 170|     4|2.8891622179322702| 0.6363952314771766| 0.6692007152406965|          77|           62|
| 148|1360|     4| 2.865212410739362| 0.6345398356623841| 0.7051657375555284|          78|           50|
| 148| 623|     3|2.8442482184530484|0.47453877281934687|0.48313431526933326|          79|           73|
| 148|3673|     4|2.7337889660285306| 0.6309297535714574| 0.7466096449557735|          80|           40|
| 148|2226|     5|2.6846084874271288| 0.7864662367477344| 1.5051499783199058|          81|            9|
| 148| 442|     2| 2.640855937602854|0.31372354971888194| 0.3080784439085271|          82|           89|
| 148|1345|     4| 2.635954278994843| 0.6257511533682862| 0.7929594526822421|          83|           32|
| 148|2678|     4| 2.589285654745082|    0.6240842600889| 0.7622056497070936|          84|           37|
| 148|4200|     4| 2.573221392689499| 0.6224455656099759| 0.7162089270041654|          85|           47|
| 148|1992|     2| 2.514133120958961| 0.3104171255403102| 0.3058497366056642|          86|           92|
| 148|2784|     4|2.1065622137165483| 0.6192495309430728| 0.6717951158281678|          87|           61|
| 148|  93|     5| 2.001673145135418| 0.7721133140055506| 1.1212191210878772|          88|           21|
| 148|2027|     4|1.9038114093203287| 0.6161568878170542| 0.6771752303951257|          89|           59|
| 148|1412|     4|1.8414997722293232|  0.614647545159457| 0.8412396714286098|          90|           26|
| 148|1917|     4| 1.813378827686869| 0.6131619546107123|  0.701700254327818|          91|           51|
| 148| 863|     4|1.7562415153102715| 0.6116994732113284| 0.6828291186548805|          92|           57|
| 148|1113|     5|1.5788921884304883| 0.7628243530057964|  1.138351243484765|          93|           20|
| 148| 848|     4| 1.563793367441527| 0.6088413868529735| 0.6857664022956538|          94|           56|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+


In [35]:
df6 = spark.sql(
'''
select 1 - avg(p.dcg / a.idcg) as ndcg
from (
    select
        x.user,
        sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
    from (
        select
            user,
            rating,
            row_number() OVER (
                PARTITION BY user
                ORDER BY prediction DESC
            ) as pred_row_num
        from predictions_df
    ) x 
    where x.pred_row_num <= 10
    group by x.user
) p
join (
    select
        x.user,
        sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
    from (
        select
            user,
            rating,
            row_number() OVER (
                PARTITION BY user
                ORDER BY rating DESC
            ) as actual_row_num
        from predictions_df
    ) x 
    where x.actual_row_num <= 10
    group by x.user
) a on a.user = p.user
''')

df6.collect()[0][0]


Out[35]:
0.06786467880199432

In [36]:
# test top N ndcg implementation
def eval_ndcg(df):
    df.registerTempTable("df")
    
    score_df = spark.sql(
    '''
    select 1 - avg(p.dcg / a.idcg) as ndcg
    from (
        select
            x.user,
            sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
        from (
            select
                user,
                rating,
                row_number() OVER (
                    PARTITION BY user
                    ORDER BY prediction DESC
                ) as pred_row_num
            from df
        ) x 
        where x.pred_row_num <= 10
        group by x.user
    ) p
    join (
        select
            x.user,
            sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
        from (
            select
                user,
                rating,
                row_number() OVER (
                    PARTITION BY user
                    ORDER BY rating DESC
                ) as actual_row_num
            from df
        ) x 
        where x.actual_row_num <= 10
        group by x.user
    ) a on a.user = p.user
    '''
    )
    
    return score_df.collect()[0][0]

In [37]:
print('train ndcg: ', ndcg_evaluator.evaluate(train_predictions_df))
print('test ndcg: ', ndcg_evaluator.evaluate(predictions_df))
print('train ndcg_10: ', eval_ndcg(train_predictions_df))
print('test ndcg_10: ', eval_ndcg(predictions_df))


train ndcg:  0.040854358939851565
test ndcg:  0.043669841712116964
train ndcg_10:  0.0634047824074252
test ndcg_10:  0.06786467880199432

In [38]:
print('random train ndcg: ', ndcg_evaluator.evaluate(train_predict_df))
print('random test ndcg: ', ndcg_evaluator.evaluate(test_predict_df))
print('random train ndcg_10: ', eval_ndcg(train_predict_df))
print('random test ndcg_10: ', eval_ndcg(test_predict_df))


random train ndcg:  0.06848897846773916
random test ndcg:  0.06809032973399232
random train ndcg_10:  0.10590540704842988
random test ndcg_10:  0.10565139486116693

In [41]:
reviews_df.describe().show()


+-------+-----------------+------------------+------------------+
|summary|             user|              item|            rating|
+-------+-----------------+------------------+------------------+
|  count|           698013|            698013|            698013|
|   mean|7726.854631647261| 945.5814547866587|3.8090322100018192|
| stddev|7209.145168047517|1004.7410488846924|1.0271331256148968|
|    min|                0|                 0|                 1|
|    max|            25486|              5068|                 5|
+-------+-----------------+------------------+------------------+


In [ ]: