In [1]:
from collections import Counter


users_interests = [
    ["Hadoop", "Big Data", "HBase", "Java", "Spark", "Storm", "Cassandra"],
    ["NoSQL", "MongoDB", "Cassandra", "HBase", "Postgres"],
    ["Python", "scikit-learn", "scipy", "numpy", "statsmodels", "pandas"],
    ["R", "Python", "statistics", "regression", "probability"],
    ["machine learning", "regression", "decision trees", "libsvm"],
    ["Python", "R", "Java", "C++", "Haskell", "programming languages"],
    ["statistics", "probability", "mathematics", "theory"],
    ["machine learning", "scikit-learn", "Mahout", "neural networks"],
    ["neural networks", "deep learning", "Big Data", "artificial intelligence"],
    ["Hadoop", "Java", "MapReduce", "Big Data"],
    ["statistics", "R", "statsmodels"],
    ["C++", "deep learning", "artificial intelligence", "probability"],
    ["pandas", "R", "Python"],
    ["databases", "HBase", "Postgres", "MySQL", "MongoDB"],
    ["libsvm", "regression", "support vector machines"]
]


popular_interests = Counter(interest for user_interests in users_interests
                            for interest in user_interests).most_common()


def most_popular_new_interests(user_interests, max_results=5):
    suggestions = [(interest, frequency) for interest, frequency in popular_interests
                   if interest not in user_interests]

    return suggestions[:max_results]

In [2]:
most_popular_new_interests(users_interests[1], 5)


Out[2]:
[('R', 4), ('Python', 4), ('statistics', 3), ('Java', 3), ('probability', 3)]

In [ ]: