In [3]:
from eval_model import NDCGEvaluator, NDCG10Evaluator
import numpy as np

In [56]:
all_rows1 = []
for i in range(1000):
    user = (i * np.ones(50, dtype=np.int)).tolist()
    rating = np.random.randint(1, 6, 50).tolist()
    prediction = np.random.randint(1, 6, 50).tolist()
    all_rows1 = all_rows1 + list(zip(user, rating, prediction))
# print(all_rows1)
# print(type(all_rows1[0][2]))

rand_df1 = spark.createDataFrame(all_rows1, ['user', 'rating', 'prediction'])
rand_df1.printSchema()
rand_df1.show()


root
 |-- user: long (nullable = true)
 |-- rating: long (nullable = true)
 |-- prediction: long (nullable = true)

+----+------+----------+
|user|rating|prediction|
+----+------+----------+
|   0|     1|         1|
|   0|     4|         3|
|   0|     3|         1|
|   0|     4|         3|
|   0|     5|         5|
|   0|     4|         2|
|   0|     1|         4|
|   0|     5|         5|
|   0|     2|         5|
|   0|     4|         4|
|   0|     1|         2|
|   0|     1|         4|
|   0|     5|         4|
|   0|     3|         2|
|   0|     1|         1|
|   0|     2|         1|
|   0|     1|         1|
|   0|     3|         5|
|   0|     1|         1|
|   0|     1|         4|
+----+------+----------+
only showing top 20 rows


In [57]:
evaluator = NDCG10Evaluator()
1 - evaluator.evaluate(rand_df1)


Out[57]:
0.6033739317615976

In [ ]: