In [1]:
from recommender import Recommender
from pyspark.ml.evaluation import RegressionEvaluator
from eval_model import TopQuantileEvaluator, NDCGEvaluator, NDCG10Evaluator
from pyspark.sql import functions as F
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/utils.py in deco(*a, **kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
Py4JJavaError: An error occurred while calling o27.sessionState.
: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionState':
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:981)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
... 13 more
Caused by: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog':
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:169)
at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
... 18 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
... 26 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
... 31 more
Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
... 39 more
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
... 40 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
... 46 more
Caused by: javax.jdo.JDOFatalDataStoreException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
at java.security.AccessController.doPrivileged(Native Method)
at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
... 105 more
------
NestedThrowables:
java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
at java.security.AccessController.doPrivileged(Native Method)
at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
... 105 more
------
at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:436)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
at java.security.AccessController.doPrivileged(Native Method)
at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
... 51 more
Caused by: java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
at java.security.AccessController.doPrivileged(Native Method)
at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
at javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)
at org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)
at org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)
at org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:57)
at org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:66)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)
at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:199)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:188)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:358)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:262)
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:66)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166)
at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:86)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101)
at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100)
at org.apache.spark.sql.internal.SessionState.<init>(SessionState.scala:157)
at org.apache.spark.sql.hive.HiveSessionState.<init>(HiveSessionState.scala:32)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
... 105 more
------
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.jolbox.bonecp.PoolUtil.generateSQLException(PoolUtil.java:192)
at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:422)
at com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)
at org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)
at org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:298)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
at org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
at org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
... 80 more
Caused by: java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.<init>(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)
at org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)
at com.jolbox.bonecp.BoneCP.<init>(BoneCP.java:416)
... 92 more
Caused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@318cfa6d, see the next exception for details.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)
... 108 more
Caused by: ERROR XSDB6: Another instance of Derby may have already booted the database /Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/metastore_db.
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.iapi.error.StandardException.newException(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)
at org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)
at org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)
at org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)
at org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)
at org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)
at org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)
at org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)
... 105 more
During handling of the above exception, another exception occurred:
IllegalArgumentException Traceback (most recent call last)
<ipython-input-1-396c23e42bfc> in <module>()
1 from recommender import Recommender
2 from pyspark.ml.evaluation import RegressionEvaluator
----> 3 from eval_model import TopQuantileEvaluator, NDCGEvaluator, NDCG10Evaluator
4 from pyspark.sql import functions as F
/Users/samuellee/Galvanize/DSI/capstone/restaurantrecs/src/eval_model.py in <module>()
13 ps.sql.SparkSession.builder
14 # .master("local[8]")
---> 15 .appName("eval_model")
16 .getOrCreate()
17 )
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/session.py in getOrCreate(self)
177 session = SparkSession(sc)
178 for key, value in self._options.items():
--> 179 session._jsparkSession.sessionState().conf().setConfString(key, value)
180 for key, value in self._options.items():
181 session.sparkContext._conf.set(key, value)
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/usr/local/Cellar/apache-spark/2.1.1/libexec/python/pyspark/sql/utils.py in deco(*a, **kw)
77 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
78 if s.startswith('java.lang.IllegalArgumentException: '):
---> 79 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
80 raise
81 return deco
IllegalArgumentException: "Error while instantiating 'org.apache.spark.sql.hive.HiveSessionState':"
In [ ]:
rmse_evaluator = RegressionEvaluator(
metricName="rmse", labelCol="rating", predictionCol="prediction")
quant_evaluator = TopQuantileEvaluator()
ndcg_evaluator = NDCGEvaluator()
ndcg10_evaluator = NDCG10Evaluator()
In [22]:
# Load restaurant reviews
reviews_df = spark.read.parquet('../data/ratings_ugt10_igt10')
# Randomly split data into train and test datasets
train_df, test_df = reviews_df.randomSplit(weights=[0.5, 0.5])
print(train_df.printSchema())
root
|-- user: integer (nullable = true)
|-- item: integer (nullable = true)
|-- rating: byte (nullable = true)
None
In [23]:
estimator = Recommender(
useALS=True,
useBias=True,
lambda_1=7,
lambda_2=12,
userCol='user',
itemCol='item',
ratingCol='rating',
rank=76,
regParam=0.7,
maxIter=15,
nonnegative=True
)
model = estimator.fit(train_df)
train_predictions_df = model.transform(train_df)
predictions_df = model.transform(test_df)
print(predictions_df.printSchema())
train_predictions_df.registerTempTable("train_predictions_df")
predictions_df.registerTempTable("predictions_df")
root
|-- user: integer (nullable = true)
|-- item: integer (nullable = true)
|-- rating: byte (nullable = true)
|-- prediction: double (nullable = true)
None
In [24]:
# print('rmse: ', rmse_evaluator.evaluate(predictions_df))
# print('quant: ', quant_evaluator.evaluate(predictions_df))
print('train ndcg: ', ndcg_evaluator.evaluate(train_predictions_df))
print('test ndcg: ', ndcg_evaluator.evaluate(predictions_df))
print('train ndcg10: ', ndcg10_evaluator.evaluate(train_predictions_df))
print('test ndcg10: ', ndcg10_evaluator.evaluate(predictions_df))
train ndcg: 0.040854358939851565
test ndcg: 0.043669841712116964
train ndcg10: 0.0634047824074252
test ndcg10: 0.06786467880199432
In [25]:
predictions_df.head(40)
Out[25]:
[Row(user=148, item=1342, rating=3, prediction=3.540774848167022),
Row(user=148, item=623, rating=3, prediction=3.513993506770806),
Row(user=148, item=137, rating=5, prediction=3.905822759236992),
Row(user=148, item=2027, rating=4, prediction=3.5372629508440667),
Row(user=148, item=321, rating=4, prediction=3.2196287628480587),
Row(user=148, item=1160, rating=4, prediction=3.2945274173632004),
Row(user=148, item=2797, rating=2, prediction=2.565378822933188),
Row(user=148, item=368, rating=3, prediction=3.591911341813706),
Row(user=148, item=642, rating=4, prediction=3.1399276932696516),
Row(user=148, item=1183, rating=3, prediction=2.9878156642558267),
Row(user=148, item=784, rating=4, prediction=3.3050335809953797),
Row(user=148, item=914, rating=5, prediction=3.3181439113424034),
Row(user=148, item=2678, rating=4, prediction=3.1182221027665733),
Row(user=148, item=236, rating=5, prediction=3.8167642708677896),
Row(user=148, item=4200, rating=4, prediction=3.389878863929102),
Row(user=148, item=973, rating=3, prediction=2.3138587127736123),
Row(user=148, item=223, rating=3, prediction=3.3759119114750824),
Row(user=148, item=388, rating=3, prediction=2.9705258815644555),
Row(user=148, item=3673, rating=4, prediction=3.1193572669211633),
Row(user=148, item=128, rating=5, prediction=3.705003941581893),
Row(user=148, item=93, rating=5, prediction=3.466300782149153),
Row(user=148, item=1055, rating=3, prediction=2.629369416920797),
Row(user=148, item=132, rating=5, prediction=3.374166837614215),
Row(user=148, item=1382, rating=3, prediction=3.089001365720218),
Row(user=148, item=1773, rating=3, prediction=2.0110700289854986),
Row(user=148, item=274, rating=4, prediction=3.2506467186920585),
Row(user=148, item=442, rating=2, prediction=3.279309994864235),
Row(user=148, item=2102, rating=4, prediction=2.6781772210276547),
Row(user=148, item=1934, rating=4, prediction=2.729912896551716),
Row(user=148, item=1538, rating=3, prediction=2.7073698000326445),
Row(user=148, item=1113, rating=5, prediction=3.345735948219378),
Row(user=148, item=339, rating=4, prediction=2.7733823147047785),
Row(user=148, item=1917, rating=4, prediction=3.168207802425375),
Row(user=148, item=1029, rating=4, prediction=3.3789341665147496),
Row(user=148, item=3364, rating=4, prediction=3.042050416433085),
Row(user=148, item=1360, rating=4, prediction=3.4688810846159903),
Row(user=148, item=397, rating=5, prediction=3.333671831768095),
Row(user=148, item=43, rating=5, prediction=3.6721713139637284),
Row(user=148, item=511, rating=5, prediction=3.2148373995970427),
Row(user=148, item=2257, rating=4, prediction=3.611771349530647)]
In [26]:
predictions_df.groupBy('user').count().orderBy('count', ascending=False).head(10)
Out[26]:
[Row(user=0, count=456),
Row(user=2, count=347),
Row(user=1, count=336),
Row(user=3, count=304),
Row(user=4, count=295),
Row(user=6, count=242),
Row(user=5, count=240),
Row(user=7, count=216),
Row(user=9, count=212),
Row(user=8, count=209)]
In [27]:
df2a = spark.sql(
'''
select
user,
sum(dcg) / sum(idcg) as ndcg
from (
select
user,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
)
) as dcg,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
)
) as idcg
from predictions_df
) x
group by user
'''
)
df2b = spark.sql(
'''
select
p.user,
p.dcg / a.idcg as ndcg10
from (
select
x.user,
sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num
from predictions_df
) x
where x.pred_row_num <= 10
group by x.user
) p
join (
select
x.user,
sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as actual_row_num
from predictions_df
) x
where x.actual_row_num <= 10
group by x.user
) a on a.user = p.user
'''
)
print(df2a.show(10))
print(df2b.show(10))
+----+------------------+
|user| ndcg|
+----+------------------+
| 148|0.9646962597956135|
| 463|0.9596764020655281|
| 471|0.9678051020325722|
| 496|0.9621525950828473|
| 833|0.9631342487318908|
|1088|0.9739746963777636|
|1238|0.9768922971648027|
|1342| 0.886232397232901|
|1580|0.9561892451394816|
|1591|0.9786135657109796|
+----+------------------+
only showing top 10 rows
None
+----+------------------+
|user| ndcg10|
+----+------------------+
| 148|0.8345057637681018|
| 463|0.8605202897368526|
| 471|0.8717908420343268|
| 496|0.8608970003302908|
| 833|0.8511244491581771|
|1088|0.8932880058765676|
|1238|0.9311331006419016|
|1342|0.6935498527651609|
|1580|0.8704978084093802|
|1591|0.9162164076223135|
+----+------------------+
only showing top 10 rows
None
In [28]:
df3 = spark.sql(
'''
select
user,
item,
rating,
prediction,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
)
) as dcg,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
)
) as idcg,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as ideal_row_num
from predictions_df
where user = 148
order by pred_row_num
'''
)
df3.show(100)
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
|user|item|rating| prediction| dcg| idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
| 148| 137| 5| 3.905822759236992| 5.0| 5.0| 1| 1|
| 148| 236| 5|3.8167642708677896| 3.154648767857287| 3.154648767857287| 2| 2|
| 148| 462| 4| 3.795090406609635| 2.0| 0.884258917830015| 3| 22|
| 148| 45| 4| 3.725317086765978| 1.7227062322935722| 0.8724171679421261| 4| 23|
| 148| 128| 5| 3.705003941581893| 1.934264036172708| 2.5| 5| 3|
| 148| 43| 5|3.6721713139637284| 1.7810359355401109| 2.1533827903669653| 6| 4|
| 148| 74| 1| 3.626755793383918| 0.3333333333333333|0.15256487060115928| 7| 93|
| 148|2257| 4| 3.611771349530647| 1.2618595071429148| 0.8613531161467861| 8| 24|
| 148| 368| 3| 3.591911341813706| 0.9030899869919435| 0.4981429386478135| 9| 64|
| 148|1342| 3| 3.540774848167022| 0.8671944789536634|0.49632766175717496| 10| 65|
| 148|2027| 4|3.5372629508440667| 1.1157717826045193| 0.8509842142134525| 11| 25|
| 148| 4| 5| 3.532812136431078| 1.3511907721365988| 1.934264036172708| 12| 5|
| 148| 189| 5|3.5256628637261187| 1.3132476751859679| 1.7810359355401109| 13| 6|
| 148| 623| 3| 3.513993506770806| 0.7678740744294464|0.49455257016648113| 14| 66|
| 148| 98| 4|3.5001110174023573| 1.0| 0.8412396714286098| 15| 26|
| 148| 395| 5|3.4714944426156187| 1.22325271059113| 1.6666666666666667| 16| 7|
| 148|1360| 4|3.4688810846159903| 0.9592498662725258| 0.8320583907060379| 17| 27|
| 148| 93| 5| 3.466300782149153| 1.1770445668331913| 1.5773243839286435| 18| 8|
| 148| 180| 5|3.4570656306828695| 1.1568910657987959| 1.5051499783199058| 19| 9|
| 148|2226| 5|3.4388334685260684| 1.138351243484765| 1.4453241315894392| 20| 10|
| 148|1992| 2| 3.436241094296184| 0.4484876484351509| 0.3080784439085271| 21| 89|
| 148| 23| 1| 3.414907381837528|0.22106472945750374|0.15221034671324338| 22| 94|
| 148| 443| 2| 3.393853194277691|0.43620858397106305| 0.3073237725797285| 23| 90|
| 148|4200| 4| 3.389878863929102| 0.8613531161467861| 0.8233873298417377| 24| 28|
| 148|1029| 4|3.3789341665147496| 0.8509842142134525| 0.8151801883620248| 25| 29|
| 148| 196| 4| 3.378441645000521| 0.8412396714286098| 0.8073963463283993| 26| 30|
| 148| 223| 3|3.3759119114750824| 0.6240437930295284|0.49281614988615063| 27| 67|
| 148| 132| 5| 3.374166837614215| 1.0292341623021721| 1.3947147282556491| 28| 11|
| 148| 165| 5|3.3717244361143717| 1.018975235452531| 1.3511907721365988| 29| 12|
| 148| 528| 5| 3.366175946011351| 1.0092454329104992| 1.3132476751859679| 30| 13|
| 148|2317| 4| 3.35733122851261| 0.8| 0.8| 31| 31|
| 148|1113| 5| 3.345735948219378| 0.9911993158528026| 1.2797901240490774| 32| 14|
| 148| 397| 5| 3.333671831768095| 0.982808161164113| 1.25| 33| 15|
| 148| 229| 5|3.3319567172973965| 0.9747951094689316| 1.22325271059113| 34| 16|
| 148|1345| 4| 3.326688645739784| 0.7737056144690833| 0.7929594526822421| 35| 32|
| 148| 172| 5|3.3209650222906433| 0.9597936000328007| 1.1990623328406573| 36| 17|
| 148| 914| 5|3.3181439113424034| 0.952757062133867| 1.1770445668331913| 37| 18|
| 148| 784| 4|3.3050335809953797| 0.7568014380674801| 0.7862465289312903| 38| 33|
| 148| 389| 3| 3.299809858598893| 0.5637054741273227|0.49111696633564683| 39| 68|
| 148| 517| 4| 3.299049784109487| 0.7466096449557735| 0.7798360875751452| 40| 34|
| 148|1160| 4|3.2945274173632004| 0.7417960936614756| 0.7737056144690833| 41| 35|
| 148| 314| 3|3.2871677378502007| 0.5528664994461185| 0.4894536590505323| 42| 69|
| 148| 442| 2| 3.279309994864235|0.36633850182726724|0.30658097730535616| 43| 91|
| 148| 863| 4| 3.278311544910183| 0.728351601879753| 0.7678348800262406| 44| 36|
| 148| 274| 4|3.2506467186920585| 0.7241703871201608| 0.7622056497070936| 45| 37|
| 148|1009| 4|3.2494359988765122| 0.7201253066267705| 0.7568014380674801| 46| 38|
| 148|3002| 3|3.2459966358484564| 0.537156695253124|0.48782493681490247| 47| 70|
| 148| 820| 3| 3.225233068686384| 0.5343107806620333| 0.4862295731793669| 48| 71|
| 148| 321| 4|3.2196287628480587| 0.7087352805422317| 0.7516072988364303| 49| 39|
| 148| 511| 5|3.2148373995970427| 0.8814571719444104| 1.1568910657987959| 50| 19|
| 148|1917| 4| 3.168207802425375| 0.701700254327818| 0.7466096449557735| 51| 40|
| 148| 848| 4|3.1482207596188427| 0.6983337201921797| 0.7417960936614756| 52| 41|
| 148|1138| 5| 3.140906853881514| 0.8688267143571999| 1.138351243484765| 53| 20|
| 148| 642| 4|3.1399276932696516| 0.6918787617803084| 0.7371553325948247| 54| 42|
| 148|3673| 4|3.1193572669211633| 0.6887817351763924| 0.7326770036545345| 55| 43|
| 148|2300| 4| 3.118889106150478| 0.6857664022956538| 0.728351601879753| 56| 44|
| 148|2678| 4|3.1182221027665733| 0.6828291186548805| 0.7241703871201608| 57| 45|
| 148|2110| 4| 3.109505748018714| 0.6799664651476561| 0.7201253066267705| 58| 46|
| 148|1734| 4|3.1040262783642074| 0.6771752303951257| 0.7162089270041654| 59| 47|
| 148|2894| 4| 3.100391811868997| 0.6744523947580044| 0.7124143742160444| 60| 48|
| 148|1382| 3| 3.089001365720218| 0.5038463368711258| 0.4846664023289891| 61| 72|
| 148|1571| 4|3.0831510948380583| 0.6692007152406965| 0.7087352805422317| 62| 49|
| 148| 275| 4| 3.047165413715316| 0.6666666666666666| 0.7051657375555284| 63| 50|
| 148|3364| 4| 3.042050416433085| 0.6641905848637513| 0.701700254327818| 64| 51|
| 148|2784| 4| 3.026545764095488| 0.6617702156762333| 0.6983337201921797| 65| 52|
| 148| 62| 4| 3.018308581467604| 0.6594034268886415| 0.69506137148576| 66| 53|
| 148|2334| 3| 3.003152147583453|0.49281614988615063|0.48313431526933326| 67| 73|
| 148|1183| 3|2.9878156642558267|0.49111696633564683|0.48163225630206424| 68| 74|
| 148|2421| 4| 2.983440995763007| 0.652604878734043| 0.6918787617803084| 69| 54|
| 148|1180| 5| 2.981050530003105| 0.8130415613581707| 1.1212191210878772| 70| 21|
| 148| 388| 3|2.9705258815644555| 0.4862295731793669| 0.480159219764464| 71| 75|
| 148| 170| 4|2.9275853066395667| 0.6462218697719855| 0.6887817351763924| 72| 55|
| 148|1147| 4| 2.926051762497025| 0.6441790870257776| 0.6857664022956538| 73| 56|
| 148|1412| 4|2.8716548482976005| 0.6421763417360856| 0.6828291186548805| 74| 57|
| 148|2319| 4| 2.782274886844114| 0.6402122930192854| 0.6799664651476561| 75| 58|
| 148| 339| 4|2.7733823147047785| 0.6382856626797526| 0.6771752303951257| 76| 59|
| 148|3247| 3|2.7647109598136588|0.47729642360788244|0.47871424700981446| 77| 76|
| 148|4058| 3| 2.758133042851231| 0.4759048767467881|0.47729642360788244| 78| 77|
| 148|1934| 4| 2.729912896551716| 0.6327183637591292| 0.6744523947580044| 79| 60|
| 148|1538| 3|2.7073698000326445|0.47319731517859304| 0.4759048767467881| 80| 78|
| 148|2514| 3| 2.696926642142448|0.47187974204864064|0.47453877281934687| 81| 79|
| 148|2102| 4|2.6781772210276547| 0.6274470994377639| 0.6717951158281678| 82| 61|
| 148|1055| 3| 2.629369416920797| 0.4693133650262147|0.47319731517859304| 83| 80|
| 148|3874| 4| 2.602936560464239| 0.6240842600889| 0.6692007152406965| 84| 62|
| 148|1945| 3| 2.600118764615307|0.46683417420748186|0.47187974204864064| 85| 81|
| 148|2797| 2| 2.565378822933188| 0.3104171255403102| 0.3058497366056642| 86| 92|
| 148| 981| 4|2.5522980885954345| 0.6192495309430728| 0.6666666666666666| 87| 63|
| 148|1013| 3| 2.540723653015375| 0.4632679884033304| 0.4705853245783229| 88| 82|
| 148| 135| 3|2.4227193796557085|0.46211766586279074| 0.4693133650262147| 89| 83|
| 148| 666| 3|2.4007681011899544| 0.4609856588695927| 0.468063195066675| 90| 84|
| 148|4211| 3|2.3520939451415908|0.45987146595803424|0.46683417420748186| 91| 85|
| 148| 973| 3|2.3138587127736123|0.45877460490849636| 0.4656256883104653| 92| 86|
| 148|1038| 3| 2.120301889184338|0.45769461180347787|0.46443714820730453| 93| 87|
| 148|1773| 3|2.0110700289854986| 0.4566310401397301| 0.4632679884033304| 94| 88|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
In [29]:
toy_df = spark.createDataFrame([
(1,1,1,3.8), (1,2,3,3.8), (1,3,1,3.8), (1,4,1,3.8), (1,5,5,3.8),
(1,6,4,3.8), (1,7,5,3.8), (1,8,5,3.8), (1,9,5,3.8), (1,10,5,3.8),
],['user','item','rating', 'prediction'])
toy_df.registerTempTable("toy_df")
In [30]:
df3 = spark.sql(
'''
select
user,
item,
rating,
prediction,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
)
) as dcg,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
)
) as idcg,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as ideal_row_num
from toy_df
'''
)
df3.show(100)
+----+----+------+----------+-------------------+-------------------+------------+-------------+
|user|item|rating|prediction| dcg| idcg|pred_row_num|ideal_row_num|
+----+----+------+----------+-------------------+-------------------+------------+-------------+
| 1| 5| 5| 3.8| 5.0| 5.0| 1| 1|
| 1| 7| 5| 3.8| 3.154648767857287| 3.154648767857287| 2| 2|
| 1| 8| 5| 3.8| 2.5| 2.5| 3| 3|
| 1| 9| 5| 3.8| 2.1533827903669653| 2.1533827903669653| 4| 4|
| 1| 10| 5| 3.8| 1.934264036172708| 1.934264036172708| 5| 5|
| 1| 6| 4| 3.8| 1.4248287484320887| 1.4248287484320887| 6| 6|
| 1| 2| 3| 3.8| 1.0| 1.0| 7| 7|
| 1| 1| 1| 3.8| 0.3154648767857287| 0.3154648767857287| 8| 8|
| 1| 3| 1| 3.8|0.30102999566398114|0.30102999566398114| 9| 9|
| 1| 4| 1| 3.8| 0.2890648263178878| 0.2890648263178878| 10| 10|
+----+----+------+----------+-------------------+-------------------+------------+-------------+
In [31]:
avg_rating_df = (
train_df
.agg(
F.avg('rating').alias('avg_rating')
)
)
train_predict_df = (
train_df
.crossJoin(avg_rating_df)
.withColumn(
'prediction',
F.col('avg_rating') + F.randn()
)
.select(
'user',
'item',
'rating',
'prediction'
)
)
train_predict_df.registerTempTable("train_predict_df")
train_predict_df.show()
+----+----+------+------------------+
|user|item|rating| prediction|
+----+----+------+------------------+
| 0| 22| 4| 3.438706702931956|
| 0| 34| 3| 2.472287134629834|
| 0| 43| 5| 2.800809337009305|
| 0| 62| 4|2.8652366697503315|
| 0| 74| 5| 4.203724077491838|
| 0| 106| 4|3.6165322950273247|
| 0| 134| 3|3.4806515832663814|
| 0| 146| 3|1.0257466319226944|
| 0| 149| 5| 5.40518003004515|
| 0| 188| 3| 3.016204829162767|
| 0| 190| 4|3.1782809346052545|
| 0| 222| 5| 2.96560250164553|
| 0| 230| 4| 4.012016542731463|
| 0| 350| 5| 3.745525760959535|
| 0| 399| 4| 3.892543386994308|
| 0| 403| 2| 3.68719142950705|
| 0| 434| 4|5.1836311444517555|
| 0| 457| 4| 3.593295770876345|
| 0| 464| 4| 4.961416887205116|
| 0| 533| 4| 4.65880147889484|
+----+----+------+------------------+
only showing top 20 rows
In [32]:
df4 = spark.sql(
'''
select
user,
item,
rating,
prediction,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
)
) as dcg,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
)
) as idcg,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as ideal_row_num
from train_predict_df
where user = 148
order by pred_row_num
'''
)
df4.show(20)
+----+----+------+------------------+------------------+-------------------+------------+-------------+
|user|item|rating| prediction| dcg| idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+------------------+-------------------+------------+-------------+
| 148|2304| 4|6.1706119023937624| 4.0| 0.7798360875751452| 1| 34|
| 148|1649| 4| 5.853989483210963|2.5237190142858297| 0.6918787617803084| 2| 54|
| 148| 369| 4|5.6701715093340965| 2.0| 0.884258917830015| 3| 22|
| 148| 221| 3| 5.5749159610576|1.2920296742201793|0.48163225630206424| 4| 74|
| 148| 633| 3| 5.563554499918556|1.1605584217036249| 0.480159219764464| 5| 75|
| 148|2441| 3| 5.262028353850321|1.0686215613240666|0.47187974204864064| 6| 81|
| 148| 379| 4| 5.122374288143945|1.3333333333333333| 0.8724171679421261| 7| 23|
| 148|1044| 4| 5.096395902420889|1.2618595071429148| 0.7929594526822421| 8| 32|
| 148| 155| 4| 5.092408908726938|1.2041199826559246| 0.6983337201921797| 9| 52|
| 148|1870| 4| 5.059104624332527|1.1562593052715513| 0.6887817351763924| 10| 55|
| 148|4091| 4| 5.048827400797215|1.1157717826045193| 0.701700254327818| 11| 51|
| 148|3062| 4| 4.996674799622005| 1.080952617709279| 0.7417960936614756| 12| 41|
| 148| 621| 2| 4.783922966805143|0.5252990700743871| 0.3080784439085271| 13| 89|
| 148|3447| 3| 4.774798146327548|0.7678740744294464| 0.468063195066675| 14| 84|
| 148| 829| 4| 4.774026623927967| 1.0| 0.8| 15| 31|
| 148| 805| 3| 4.769243262443236| 0.733951626354678| 0.4894536590505323| 16| 69|
| 148|4392| 4| 4.75935488148759|0.9592498662725258| 0.7371553325948247| 17| 42|
| 148|2131| 4| 4.733549388359078| 0.941635653466553| 0.7466096449557735| 18| 40|
| 148|1353| 3| 4.71437848053403|0.6941346394792774|0.49632766175717496| 19| 65|
| 148|1085| 4| 4.667151107839208| 0.910680994787812| 0.6744523947580044| 20| 60|
+----+----+------+------------------+------------------+-------------------+------------+-------------+
only showing top 20 rows
In [33]:
test_predict_df = (
test_df
.crossJoin(avg_rating_df)
.withColumn(
'prediction',
F.col('avg_rating') + F.randn()
)
.select(
'user',
'item',
'rating',
'prediction'
)
)
test_predict_df.registerTempTable("test_predict_df")
test_predict_df.show()
+----+----+------+------------------+
|user|item|rating| prediction|
+----+----+------+------------------+
| 0| 18| 4| 5.8382427406166|
| 0| 32| 4|4.6595378220695975|
| 0| 35| 5| 3.666596910823167|
| 0| 36| 3|3.5493592850388094|
| 0| 50| 5| 5.249635815300059|
| 0| 70| 4|3.4895007637050472|
| 0| 77| 3| 2.83960383925089|
| 0| 78| 5|2.5888604065867105|
| 0| 98| 5| 4.375817741178096|
| 0| 116| 5| 4.519717794780622|
| 0| 136| 2| 4.503841947780726|
| 0| 157| 4| 5.584036452988005|
| 0| 161| 4|2.2316745115703567|
| 0| 198| 5| 4.448128710449876|
| 0| 217| 3|3.5708312971283434|
| 0| 235| 4| 4.017185579566163|
| 0| 236| 4| 3.036628914714359|
| 0| 243| 4| 3.810116619652029|
| 0| 266| 4| 2.517439511208432|
| 0| 294| 2| 5.199446980791228|
+----+----+------+------------------+
only showing top 20 rows
In [34]:
df5 = spark.sql(
'''
select
user,
item,
rating,
prediction,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
)
) as dcg,
rating / log(2, 1 +
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
)
) as idcg,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as ideal_row_num
from test_predict_df
where user = 148
'''
)
df5.show(200)
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
|user|item|rating| prediction| dcg| idcg|pred_row_num|ideal_row_num|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
| 148|1945| 3|5.8026854996111155| 3.0|0.47187974204864064| 1| 81|
| 148|1055| 3| 5.779313255718588| 1.8927892607143721|0.49111696633564683| 2| 68|
| 148| 275| 4| 5.757909587955016| 2.0| 0.7371553325948247| 3| 42|
| 148|1571| 4| 5.629198979997861| 1.7227062322935722| 0.7201253066267705| 4| 46|
| 148|2334| 3| 5.523136737160017| 1.1605584217036249| 0.4705853245783229| 5| 82|
| 148|1183| 3| 5.470647577132097| 1.0686215613240666| 0.4759048767467881| 6| 78|
| 148|1147| 4| 5.363699977681501| 1.3333333333333333| 0.6799664651476561| 7| 58|
| 148| 511| 5| 5.304830908794267| 1.5773243839286435| 2.5| 8| 3|
| 148| 4| 5| 5.224939568931342| 1.5051499783199058| 1.934264036172708| 9| 5|
| 148| 236| 5| 5.206412377914868| 1.4453241315894392| 1.3947147282556491| 10| 11|
| 148| 196| 4| 5.146903165181599| 1.1157717826045193| 0.8073963463283993| 11| 30|
| 148| 443| 2| 5.138711800469165| 0.5404763088546395| 0.3073237725797285| 12| 90|
| 148| 62| 4| 5.128536481656779| 1.0505981401487743| 0.7417960936614756| 13| 41|
| 148|1773| 3| 5.06995011719117| 0.7678740744294464|0.48782493681490247| 14| 70|
| 148| 339| 4| 5.056671922070084| 1.0| 0.8724171679421261| 15| 23|
| 148|3364| 4| 4.982075386835361| 0.9786021684729039| 0.7516072988364303| 16| 39|
| 148| 223| 3|4.9320746601755205| 0.7194373997043944| 0.4862295731793669| 17| 71|
| 148|1382| 3| 4.896091871244972| 0.7062267400999147|0.47453877281934687| 18| 79|
| 148| 784| 4|4.7818556805712324| 0.9255128526390366| 0.728351601879753| 19| 44|
| 148|4211| 3| 4.693014966230444| 0.6830107460908589|0.46683417420748186| 20| 85|
| 148| 165| 5| 4.664625292168775| 1.1212191210878772| 1.3132476751859679| 21| 13|
| 148| 973| 3|4.6317883470584045| 0.6631941883725112| 0.4632679884033304| 22| 88|
| 148| 528| 5| 4.611235837286305| 1.0905214599276576| 1.6666666666666667| 23| 7|
| 148| 914| 5| 4.555773407800349| 1.0766913951834827| 1.3511907721365988| 24| 12|
| 148|2319| 4|4.4926779238159575| 0.8509842142134525| 0.6744523947580044| 25| 60|
| 148|1038| 3| 4.470488682157669| 0.6309297535714573|0.49281614988615063| 26| 67|
| 148| 180| 5| 4.463114743001566| 1.0400729883825475| 1.4453241315894392| 27| 10|
| 148| 229| 5| 4.390235720282174| 1.0292341623021721| 1.1770445668331913| 28| 18|
| 148|4058| 3|4.3489906639724385| 0.6113851412715186| 0.468063195066675| 29| 84|
| 148| 820| 3| 4.345700289363394| 0.6055472597462995|0.49455257016648113| 30| 66|
| 148|2797| 2|4.2981852173088315| 0.4|0.30658097730535616| 31| 91|
| 148| 389| 3| 4.286605981476037| 0.5947195895116816|0.49632766175717496| 32| 65|
| 148|2257| 4| 4.277996588236268| 0.7862465289312903| 0.69506137148576| 33| 53|
| 148|1934| 4|4.2359382314030105| 0.7798360875751452| 0.8320583907060379| 34| 27|
| 148|1138| 5| 4.179226255299683| 0.967132018086354| 1.5773243839286435| 35| 8|
| 148| 397| 5| 4.164904456224879| 0.9597936000328007| 1.7810359355401109| 36| 6|
| 148|1734| 4| 4.125306962983956| 0.7622056497070936| 0.6666666666666666| 37| 63|
| 148| 642| 4| 4.062429333529082| 0.7568014380674801| 0.7326770036545345| 38| 43|
| 148|3002| 3| 4.060650358765983| 0.5637054741273227| 0.4693133650262147| 39| 83|
| 148| 666| 3| 4.039408215048402| 0.5599572337168301|0.47729642360788244| 40| 77|
| 148| 462| 4| 4.038236930489699| 0.7417960936614756| 0.7087352805422317| 41| 49|
| 148| 137| 5| 3.99209062111073| 0.9214441657435309| 1.22325271059113| 42| 16|
| 148| 981| 4|3.9689031238155676| 0.7326770036545345| 0.8| 43| 31|
| 148|1160| 4|3.9363643192737165| 0.728351601879753| 0.8509842142134525| 44| 25|
| 148| 132| 5| 3.925045402537325| 0.9052129839002011| 5.0| 45| 1|
| 148| 517| 4|3.8791514172380364| 0.7201253066267705| 0.6887817351763924| 46| 55|
| 148|2514| 3| 3.774731722355551| 0.537156695253124| 0.480159219764464| 47| 75|
| 148| 135| 3| 3.75796116530181| 0.5343107806620333| 0.4656256883104653| 48| 86|
| 148| 321| 4| 3.736257754104039| 0.7087352805422317| 0.884258917830015| 49| 22|
| 148| 189| 5|3.7351378151205905| 0.8814571719444104| 3.154648767857287| 50| 2|
| 148| 388| 3| 3.723436849366771| 0.5262751907458635| 0.4981429386478135| 51| 64|
| 148|1342| 3|3.6913581182641226| 0.5237502901441348| 0.4894536590505323| 52| 69|
| 148|2317| 4|3.6765348589177465| 0.69506137148576| 0.7678348800262406| 53| 36|
| 148|2300| 4| 3.660545813896746| 0.6918787617803084| 0.7798360875751452| 54| 34|
| 148| 23| 1| 3.639282025264064| 0.1721954337940981|0.15256487060115928| 55| 93|
| 148|2894| 4| 3.625646741900116| 0.6857664022956538| 0.7568014380674801| 56| 38|
| 148|2421| 4|3.6208588449973083| 0.6828291186548805| 0.8233873298417377| 57| 28|
| 148|1538| 3| 3.611398012490524| 0.5099748488607421|0.47319731517859304| 58| 80|
| 148|1009| 4|3.5352916195398767| 0.6771752303951257| 0.8613531161467861| 59| 24|
| 148| 43| 5|3.5131880633848414| 0.8430654934475055| 1.2797901240490774| 60| 14|
| 148| 98| 4| 3.325088499948339| 0.6717951158281678| 0.8151801883620248| 61| 29|
| 148|2110| 4|3.3079216308713755| 0.6692007152406965| 0.6983337201921797| 62| 52|
| 148| 128| 5|3.3074576034048144| 0.8333333333333334| 1.25| 63| 15|
| 148| 368| 3|3.2636829961028924| 0.4981429386478135|0.46443714820730453| 64| 87|
| 148| 395| 5| 3.226973498828029| 0.8272127695952917| 1.1568910657987959| 65| 19|
| 148|3247| 3| 3.226548638755594|0.49455257016648113|0.47871424700981446| 66| 76|
| 148|1013| 3|3.2158263106837235|0.49281614988615063|0.48163225630206424| 67| 74|
| 148| 274| 4|3.1838342467126344| 0.6548226217808625| 0.7737056144690833| 68| 35|
| 148|1180| 5| 3.142108940523942| 0.8157560984175538| 2.1533827903669653| 69| 4|
| 148|1029| 4|3.1288184471391443| 0.6504332490865367| 0.7241703871201608| 70| 45|
| 148| 314| 3| 3.092183412647548| 0.4862295731793669| 0.4846664023289891| 71| 72|
| 148| 45| 4|3.0866673216733114| 0.6462218697719855| 0.7124143742160444| 72| 48|
| 148|3874| 4| 2.956494323118478| 0.6441790870257776| 0.6918787617803084| 73| 54|
| 148|2102| 4| 2.94527267526665| 0.6421763417360856| 0.7862465289312903| 74| 33|
| 148| 172| 5|2.9328775534542224| 0.8002653662741067| 1.1990623328406573| 75| 17|
| 148| 74| 1|2.8972746545587538|0.15957141566993815|0.15221034671324338| 76| 94|
| 148| 170| 4|2.8891622179322702| 0.6363952314771766| 0.6692007152406965| 77| 62|
| 148|1360| 4| 2.865212410739362| 0.6345398356623841| 0.7051657375555284| 78| 50|
| 148| 623| 3|2.8442482184530484|0.47453877281934687|0.48313431526933326| 79| 73|
| 148|3673| 4|2.7337889660285306| 0.6309297535714574| 0.7466096449557735| 80| 40|
| 148|2226| 5|2.6846084874271288| 0.7864662367477344| 1.5051499783199058| 81| 9|
| 148| 442| 2| 2.640855937602854|0.31372354971888194| 0.3080784439085271| 82| 89|
| 148|1345| 4| 2.635954278994843| 0.6257511533682862| 0.7929594526822421| 83| 32|
| 148|2678| 4| 2.589285654745082| 0.6240842600889| 0.7622056497070936| 84| 37|
| 148|4200| 4| 2.573221392689499| 0.6224455656099759| 0.7162089270041654| 85| 47|
| 148|1992| 2| 2.514133120958961| 0.3104171255403102| 0.3058497366056642| 86| 92|
| 148|2784| 4|2.1065622137165483| 0.6192495309430728| 0.6717951158281678| 87| 61|
| 148| 93| 5| 2.001673145135418| 0.7721133140055506| 1.1212191210878772| 88| 21|
| 148|2027| 4|1.9038114093203287| 0.6161568878170542| 0.6771752303951257| 89| 59|
| 148|1412| 4|1.8414997722293232| 0.614647545159457| 0.8412396714286098| 90| 26|
| 148|1917| 4| 1.813378827686869| 0.6131619546107123| 0.701700254327818| 91| 51|
| 148| 863| 4|1.7562415153102715| 0.6116994732113284| 0.6828291186548805| 92| 57|
| 148|1113| 5|1.5788921884304883| 0.7628243530057964| 1.138351243484765| 93| 20|
| 148| 848| 4| 1.563793367441527| 0.6088413868529735| 0.6857664022956538| 94| 56|
+----+----+------+------------------+-------------------+-------------------+------------+-------------+
In [35]:
df6 = spark.sql(
'''
select 1 - avg(p.dcg / a.idcg) as ndcg
from (
select
x.user,
sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num
from predictions_df
) x
where x.pred_row_num <= 10
group by x.user
) p
join (
select
x.user,
sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as actual_row_num
from predictions_df
) x
where x.actual_row_num <= 10
group by x.user
) a on a.user = p.user
''')
df6.collect()[0][0]
Out[35]:
0.06786467880199432
In [36]:
# test top N ndcg implementation
def eval_ndcg(df):
df.registerTempTable("df")
score_df = spark.sql(
'''
select 1 - avg(p.dcg / a.idcg) as ndcg
from (
select
x.user,
sum(x.rating / log(2, 1 + x.pred_row_num)) as dcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY prediction DESC
) as pred_row_num
from df
) x
where x.pred_row_num <= 10
group by x.user
) p
join (
select
x.user,
sum(x.rating / log(2, 1 + x.actual_row_num)) as idcg
from (
select
user,
rating,
row_number() OVER (
PARTITION BY user
ORDER BY rating DESC
) as actual_row_num
from df
) x
where x.actual_row_num <= 10
group by x.user
) a on a.user = p.user
'''
)
return score_df.collect()[0][0]
In [37]:
print('train ndcg: ', ndcg_evaluator.evaluate(train_predictions_df))
print('test ndcg: ', ndcg_evaluator.evaluate(predictions_df))
print('train ndcg_10: ', eval_ndcg(train_predictions_df))
print('test ndcg_10: ', eval_ndcg(predictions_df))
train ndcg: 0.040854358939851565
test ndcg: 0.043669841712116964
train ndcg_10: 0.0634047824074252
test ndcg_10: 0.06786467880199432
In [38]:
print('random train ndcg: ', ndcg_evaluator.evaluate(train_predict_df))
print('random test ndcg: ', ndcg_evaluator.evaluate(test_predict_df))
print('random train ndcg_10: ', eval_ndcg(train_predict_df))
print('random test ndcg_10: ', eval_ndcg(test_predict_df))
random train ndcg: 0.06848897846773916
random test ndcg: 0.06809032973399232
random train ndcg_10: 0.10590540704842988
random test ndcg_10: 0.10565139486116693
In [41]:
reviews_df.describe().show()
+-------+-----------------+------------------+------------------+
|summary| user| item| rating|
+-------+-----------------+------------------+------------------+
| count| 698013| 698013| 698013|
| mean|7726.854631647261| 945.5814547866587|3.8090322100018192|
| stddev|7209.145168047517|1004.7410488846924|1.0271331256148968|
| min| 0| 0| 1|
| max| 25486| 5068| 5|
+-------+-----------------+------------------+------------------+
In [ ]:
Content source: samleegithub/RestaurantRecs
Similar notebooks: