In [ ]:
import pandas as pd
from IPython.core.display import HTML
path = "../input/"
versions = pd.read_csv(path+"KernelVersions.csv")
kernels = pd.read_csv(path+"Kernels.csv")
users = pd.read_csv(path+"Users.csv")
language_map = {'1' : 'R','5' : 'R', '12' : 'R', '13' : 'R', '15' : 'R', '16' : 'R',
'2' : 'Python','8' : 'Python', '9' : 'Python', '14' : 'Python'}
def pressence_check(title, tokens, ignore = []):
present = False
for token in tokens:
words = token.split()
if all(wrd.lower().strip() in title.lower() for wrd in words):
present = True
for token in ignore:
if token in title.lower():
present = False
return present
## check if the latest version of the kernel is about the same topic
def get_latest(idd):
latest = versions[versions['KernelId'] == idd].sort_values('VersionNumber', ascending = False).iloc(0)[0]
return latest['VersionNumber']
def get_kernels(tokens, n, ignore = []):
versions['isRel'] = versions['Title'].apply(lambda x : pressence_check(x, tokens, ignore))
relevant = versions[versions['isRel'] == 1]
results = relevant.groupby('KernelId').agg({'TotalVotes' : 'sum',
'KernelLanguageId' : 'max',
'Title' : lambda x : "#".join(x).split("#")[-1],
'VersionNumber' : 'max'})
results = results.reset_index().sort_values('TotalVotes', ascending = False).head(n)
results = results.rename(columns={'KernelId' : 'Id', 'TotalVotes': 'Votes'})
results['latest_version'] = results['Id'].apply(lambda x : get_latest(x))
results['isLatest'] = results.apply(lambda r : 1 if r['VersionNumber'] == r['latest_version'] else 0, axis=1)
results = results[results['isLatest'] == 1]
results = results.merge(kernels, on="Id").sort_values('TotalVotes', ascending = False)
results = results.merge(users.rename(columns={'Id':"AuthorUserId"}), on='AuthorUserId')
results['Language'] = results['KernelLanguageId'].apply(lambda x : language_map[str(x)] if str(x) in language_map else "")
results = results.sort_values("TotalVotes", ascending = False)
return results[['Title', 'CurrentUrlSlug','Language' ,'TotalViews', 'TotalComments', 'TotalVotes', "DisplayName","UserName"]]
def best_kernels(tokens, n = 10, ignore = []):
response = get_kernels(tokens, n, ignore)
hs = """<style>
.rendered_html tr {font-size: 12px; text-align: left}
</style>
<h3><font color="#1768ea">"""+tokens[0].title()+"""</font></h3>
<table>
<th>
<td><b>Kernel</b></td>
<td><b>Author</b></td>
<td><b>Language</b></td>
<td><b>Views</b></td>
<td><b>Comments</b></td>
<td><b>Votes</b></td>
</th>"""
for i, row in response.iterrows():
url = "https://www.kaggle.com/"+row['UserName']+"/"+row['CurrentUrlSlug']
aurl= "https://www.kaggle.com/"+row['UserName']
hs += """<tr>
<td>"""+str(i+1)+"""</td>
<td><a href="""+url+""" target="_blank"><b>""" + row['Title'] + """</b></a></td>
<td><a href="""+aurl+""" target="_blank">""" + row['DisplayName'] + """</a></td>
<td>"""+str(row['Language'])+"""</td>
<td>"""+str(row['TotalViews'])+"""</td>
<td>"""+str(row['TotalComments'])+"""</td>
<td>"""+str(row['TotalVotes'])+"""</td>
</tr>"""
hs += "</table>"
display(HTML(hs))
Kaggle is the place to do data science projects. There are so many algorithms and concepts to learn. Kaggle Kernels are one of the best resources on internet to understand the practical implementation of algorithms. There are almost 200,000 kernels published on kaggle and sometimes it becomes diffcult to search for the right implementation. I have used the Meta Kaggle database to create a glossary of data science models, techniques and tools shared on kaggle kernels. One can use this kernel as the one place to find other great kernels shared by great authors. Hope you like this kernel.
In [ ]:
tokens = ["linear regression"]
best_kernels(tokens, 10)
In [ ]:
tokens = ['logistic regression', "logistic"]
best_kernels(tokens, 10)
In [ ]:
tokens = ['Ridge']
best_kernels(tokens, 10)
In [ ]:
tokens = ['Lasso']
best_kernels(tokens, 10)
In [ ]:
tokens = ['ElasticNet']
best_kernels(tokens, 4)
In [ ]:
tokens = ['Decision Tree']
best_kernels(tokens, 10)
In [ ]:
tokens = ['random forest']
best_kernels(tokens, 10)
In [ ]:
tokens = ['lightgbm', 'light gbm', 'lgb']
best_kernels(tokens, 10)
In [ ]:
tokens = ['xgboost', 'xgb']
best_kernels(tokens, 10)
In [ ]:
tokens = ['catboost']
best_kernels(tokens, 10)
In [ ]:
tokens = ['neural network']
best_kernels(tokens, 10)
In [ ]:
tokens = ['autoencoder']
best_kernels(tokens, 10)
In [ ]:
tokens = ['deep learning']
best_kernels(tokens, 10)
In [ ]:
tokens = ['convolutional neural networks', 'cnn']
best_kernels(tokens, 10)
In [ ]:
tokens = ['lstm']
best_kernels(tokens, 10)
In [ ]:
tokens = ['gru']
ignore = ['grupo']
best_kernels(tokens, 10, ignore)
In [ ]:
tokens = ['mxnet']
best_kernels(tokens, 10)
In [ ]:
tokens = ['resnet']
best_kernels(tokens, 10)
In [ ]:
tokens = ['Capsule network', 'capsulenet']
best_kernels(tokens, 5)
In [ ]:
tokens = ['vgg']
best_kernels(tokens, 5)
In [ ]:
tokens = ['inception']
best_kernels(tokens, 5)
In [ ]:
tokens = ['computer vision']
best_kernels(tokens, 5)
In [ ]:
tokens = ['transfer learning']
best_kernels(tokens, 5)
In [ ]:
tokens = ['yolo']
best_kernels(tokens, 5)
In [ ]:
tokens = ['kmeans', 'k means']
best_kernels(tokens, 10)
In [ ]:
tokens = ['hierarchical clustering']
best_kernels(tokens, 3)
In [ ]:
tokens = ['dbscan']
best_kernels(tokens, 10)
In [ ]:
tokens = ['unsupervised']
best_kernels(tokens, 10)
In [ ]:
tokens = ['naive bayes']
best_kernels(tokens, 10)
In [ ]:
tokens = ['svm']
best_kernels(tokens, 10)
In [ ]:
tokens = ['knn']
best_kernels(tokens, 10)
In [ ]:
tokens = ['recommendation engine']
best_kernels(tokens, 5)
In [ ]:
tokens = ['EDA', 'exploration', 'exploratory']
best_kernels(tokens, 10)
In [ ]:
tokens = ['feature engineering']
best_kernels(tokens, 10)
In [ ]:
tokens = ['feature selection']
best_kernels(tokens, 10)
In [ ]:
tokens = ['outlier treatment', 'outlier']
best_kernels(tokens, 10)
In [ ]:
tokens = ['anomaly detection', 'anomaly']
best_kernels(tokens, 8)
In [ ]:
tokens = ['smote']
best_kernels(tokens, 5)
In [ ]:
tokens = ['pipeline']
best_kernels(tokens, 10)
In [ ]:
tokens = ['missing value']
best_kernels(tokens, 10)
In [ ]:
tokens = ['dataset decomposition', 'dimentionality reduction']
best_kernels(tokens, 2)
In [ ]:
tokens = ['PCA']
best_kernels(tokens, 10)
In [ ]:
tokens = ['Tsne', 't-sne']
best_kernels(tokens, 10)
In [ ]:
tokens = ['cross validation']
best_kernels(tokens, 10)
In [ ]:
tokens = ['model selection']
best_kernels(tokens, 10)
In [ ]:
tokens = ['model tuning', 'tuning']
best_kernels(tokens, 10)
In [ ]:
tokens = ['gridsearch', 'grid search']
best_kernels(tokens, 10)
In [ ]:
tokens = ['ensemble']
best_kernels(tokens, 10)
In [ ]:
tokens = ['stacking', 'stack']
best_kernels(tokens, 10)
In [ ]:
tokens = ['bagging']
best_kernels(tokens, 10)
In [ ]:
tokens = ['NLP', 'Natural Language Processing', 'text mining']
best_kernels(tokens, 10)
In [ ]:
tokens = ['topic modelling']
best_kernels(tokens, 8)
In [ ]:
tokens = ['word embedding','fasttext', 'glove', 'word2vec']
best_kernels(tokens, 8)
In [ ]:
tokens = ['scikit']
best_kernels(tokens, 10)
In [ ]:
tokens = ['tensorflow', 'tensor flow']
best_kernels(tokens, 10)
In [ ]:
tokens = ['theano']
best_kernels(tokens, 10)
In [ ]:
tokens = ['keras']
best_kernels(tokens, 10)
In [ ]:
tokens = ['pytorch']
best_kernels(tokens, 10)
In [ ]:
tokens = ['vowpal wabbit','vowpalwabbit']
best_kernels(tokens, 10)
In [ ]:
tokens = ['eli5']
best_kernels(tokens, 10)
In [ ]:
tokens = ['hyperopt']
best_kernels(tokens, 5)
In [ ]:
tokens = ['pandas']
best_kernels(tokens, 10)
In [ ]:
tokens = ['SQL']
best_kernels(tokens, 10)
In [ ]:
tokens = ['bigquery', 'big query']
best_kernels(tokens, 10)
In [ ]:
tokens = ['gpu']
best_kernels(tokens, 10)
In [ ]:
tokens = ['visualization', 'visualisation']
best_kernels(tokens, 10)
In [ ]:
tokens = ['plotly', 'plot.ly']
best_kernels(tokens, 10)
In [ ]:
tokens = ['seaborn']
best_kernels(tokens, 10)
In [ ]:
tokens = ['d3.js']
best_kernels(tokens, 4)
In [ ]:
tokens = ['bokeh']
best_kernels(tokens, 10)
In [ ]:
tokens = ['time series']
best_kernels(tokens, 10)
In [ ]:
tokens = ['arima']
best_kernels(tokens, 10)
In [ ]:
tokens = ['tutorial']
best_kernels(tokens, 10)
Thanks for viewing. Suggest the list of items which can be added to the list. If you liked this kernel, please upvote.