In [1]:
import os
# This hack makes ipython notebook import the modules correctly.
if (os.path.basename(os.getcwd()) == 'dswont'):
    os.chdir(os.path.dirname(os.getcwd()))

In [74]:
import logging
logging.basicConfig(level=logging.ERROR)

import collections
from contextlib import closing
import copy
import itertools
from matplotlib import pyplot as plt
import nltk
import numpy as np
import pandas as pd
import random
import semidbm

from sklearn.svm import SVC
from sklearn.base import BaseEstimator
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model.base import LinearModel
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn import cross_validation

from dswont import dbpedia
from dswont.dbpedia import uri_to_title, title_to_uri, to_title, to_category_uri
from dswont import util
from dswont import wikiapi

from dswont.topics import CategorySelectionClassifier
from dswont.topics import default_classifier
from dswont.topics import default_classifier_params
from dswont.topics import default_cross_validation
from dswont.topics import default_data
from dswont.topics import default_instance_weight_fn
from dswont.topics import default_features
from dswont.topics import depth_based_selection
from dswont.topics import evaluate_classifier
from dswont.topics import new_training_params
from dswont.topics import precompute_full_selection
from dswont.topics import read_ground_truth_data
from dswont.topics import train_topic_classifier
from dswont.topics import TrainingDataSelection

Analyzing the performance of the classifiers

Building the DataFrame of topics


In [11]:
def make_topic_df(full_selection):
    full_selection_df = pd.DataFrame({'topic':list(full_selection)})
    full_selection_df['depth'] = full_selection_df['topic'].apply(full_selection.get_depth)
    
    ground_truth_topic_relevance_data = read_ground_truth_data().items()
    ground_truth_topic_relevance_topics, ground_truth_topic_relevance_relevance =\
        zip(*read_ground_truth_data().items())
    ground_truth_topic_relevance_df = pd.DataFrame(
        {'topic':ground_truth_topic_relevance_topics,
         'is_relevant':ground_truth_topic_relevance_relevance})
    full_selection_df = full_selection_df.merge(ground_truth_topic_relevance_df, how='outer')
    
    full_selection_df['title'] = full_selection_df['topic'].apply(to_title)
    full_selection_df =\
        full_selection_df.reindex(columns=['topic', 'title', 'depth', 'is_relevant'])
    return full_selection_df

In [12]:
def make_feature_df(selection, features, topics):
    feature_maps = features.compute(selection, topics)
    for topic, feature_map in zip(topics, feature_maps):
        feature_map['topic'] = topic
    return pd.DataFrame(feature_maps)

Analyzing the classifier mistakes


In [13]:
def make_mistake_df(clf, feature_df):
    df = feature_df.copy()
    df['prediction'] = clf.predict(df['topic'])
    mistake_indices = (df['prediction'] != df['is_relevant'])
    return df[mistake_indices].dropna()

Experiments

Pre-computed data for faster execution


In [14]:
def precompute_topic_data_frame(precomputed_data={}):
    if not 'topic_df' in precomputed_data:
        full_selection = precompute_full_selection()
        topics = [topic for topic in full_selection][1:]
        feature_df = make_feature_df(full_selection, default_features, topics)
        topic_only_df = make_topic_df(full_selection)
        topic_df = topic_only_df.merge(feature_df)
        precomputed_data['full_selection'] = full_selection
        precomputed_data['topics'] = topics
        precomputed_data['topic_df'] = topic_df
    return precomputed_data['topic_df']

The distribution of relevant and irrelevant topics in the ground truth, depending on the depth


In [15]:
def stats_on_relevant_topics_in_ground_truth_vs_depth():
    feature_columns = default_features.feature_names()
    topic_df = precompute_topic_data_frame()
    feature_and_prediction_columns = feature_columns + ['is_relevant']
    stats_df = topic_df.dropna().groupby(['depth', 'is_relevant'])\
        ['title']\
        .count()\
        .unstack(['is_relevant'])\
        .fillna(0)\
        .sort(ascending=False)
    stats_df = stats_df.rename(columns={False: 'irrelevant', True: 'relevant'})
    stats_df.plot(kind='barh', stacked=True)
    plt.xlabel('The number of topics')
    stats_df['total'] = stats_df['relevant'] + stats_df['irrelevant']
    stats_df['percent_relevant'] = (stats_df['relevant'] / stats_df['total']).round(3)
    return stats_df.sort(ascending=True)

stats_on_relevant_topics_in_ground_truth_vs_depth()


Out[15]:
is_relevant relevant irrelevant total percent_relevant
depth
1 20 0 20 1.000
2 69 1 70 0.986
3 92 5 97 0.948
4 55 25 80 0.688
5 25 85 110 0.227
6 3 163 166 0.018
7 0 99 99 0.000

Training the topic selection classifier


In [16]:
def default_trained_topic_selection_classifier(precomputed_data={}):
    if 'clf' not in precomputed_data:
        full_selection = precompute_full_selection()
        training_data = default_data()
        training_params = new_training_params()
        clf = train_topic_classifier(training_data.keys(), training_data.values(),
                                     full_selection,
                                     **training_params)
        precomputed_data['clf'] = clf
    return precomputed_data['clf']

In [25]:
topics, classes = list(zip(*default_data().items()))
clf = default_trained_topic_selection_classifier()
print(evaluate_classifier(clf, topics, classes, util.accuracy_score))
print(evaluate_classifier(clf, topics, classes, util.f1_pos_class))
print(evaluate_classifier(clf, topics, classes, util.f1_neg_class))
print(evaluate_classifier(clf, topics, classes, util.weighted_f1))


0.920560747664
0.902857142857
0.932806324111
0.920490772941

Examining the test performance of the learning-based classifier on the whole data


In [30]:
def report_test_performance_of_the_learning_based_classifier():
    # Preparing the data.
    topic_df = precompute_topic_data_frame().dropna()
    topic_df['is_relevant'] = topic_df['is_relevant'].astype(bool)
    topics_and_classes = topic_df[['topic', 'is_relevant']].values
    topics = topics_and_classes[:,0]
    classes = topics_and_classes[:,1]
    # Preparing the params.
    classifier_params = default_classifier_params.copy()
    classifier_params['C'] = 3
    classifier_params['class_weight'] = {1:2, 0:1}
    classifier_params['kernel'] = 'linear'
    classifier_params['kernel'] = 'rbf'
    classifier_fn = default_classifier
    full_selection = precompute_full_selection()
    clf = train_topic_classifier(topics, classes,
                                 full_selection, full_selection._max_depth, 
                                 default_features, classifier_fn,
                                 (lambda x : 1),
                                 classifier_params)
    print(clf)
    for measure in [util.accuracy_score, util.f1_pos_class, util.f1_neg_class, util.weighted_f1]:
        print(evaluate_classifier(clf, topics, classes, measure))

report_test_performance_of_the_learning_based_classifier()


WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
CategorySelectionClassifier(C=3, class_weight={0: 1, 1: 2},
              classifier_fn=<function default_classifier at 0x10ba5a840>,
              classifier_params={'C': 3, 'class_weight': {0: 1, 1: 2}, 'kernel': 'rbf'},
              features=<dswont.topics.TopicFeatures object at 0x10ba547b8>,
              full_selection=<dswont.topics.CategorySelection object at 0x10bbea588>,
              instance_weight=<function report_test_performance_of_the_learning_based_classifier.<locals>.<lambda> at 0x1125e1bf8>,
              kernel='rbf', max_depth=7)
0.915887850467
0.900735294118
0.927027027027
0.916215473307

Reporting the cross-validated performance of the learning-based classifier


In [51]:
def default_classifier_evaluation_params():
    topic_df = precompute_topic_data_frame().dropna()
    topic_df['is_relevant'] = topic_df['is_relevant'].astype(bool)
    topics_and_classes = topic_df[['topic', 'is_relevant']].values
    topics = topics_and_classes[:,0]
    classes = topics_and_classes[:,1]
    np.random.shuffle(topics_and_classes)
    inner_cross_validation = None  # No inner cross-validation.
    outer_cross_validation = default_cross_validation
#     outer_cross_validation = lambda outputs : cross_validation.KFold(len(outputs), n_folds=2)
    def model_selection_measure(*args, **params):
        return util.weighted_f1(*args, **params)
    evaluation_measures = [util.accuracy_score, util.f1_pos_class, util.f1_neg_class, util.weighted_f1]
    return topics, classes, inner_cross_validation,\
           outer_cross_validation, model_selection_measure,\
           evaluation_measures

        
def evaluate_learning_based_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
#     def smaller_cross_validation(outputs):
#         return cross_validation.StratifiedKFold(outputs, n_folds=2)
#     inner_cross_validation = lambda outputs: cross_validation.StratifiedKFold(outputs, n_folds=3)
#     param_grid = new_training_params_cv()['param_grid']
#     param_grid[0]['C'] = [0.25, 0.5, 1, 3, 7, 15]
#     param_grid = None
    full_selection = precompute_full_selection()
    features = default_features.copy()
    classifier_params = default_classifier_params.copy()
    classifier_params['C'] = 1
    classifier_params['kernel'] = 'linear'
#     classifier_params['kernel'] = 'rbf'
#     classifier_params['gamma'] = 1
    tuned_clf = CategorySelectionClassifier(full_selection=full_selection,
                                            features=features,
                                            classifier_fn=default_classifier,
                                            max_depth=full_selection._max_depth,
                                            instance_weight=default_instance_weight_fn,
                                            **classifier_params)
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
#                                               smaller_cross_validation,                                              
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def evaluate_depth_based_classifier_cross_validated(depth):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    def smaller_cross_validation(outputs):
        return cross_validation.StratifiedKFold(outputs, n_folds=2)
    full_selection = precompute_full_selection()
    tuned_clf = depth_based_selection(full_selection._root, depth)
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
#                                               smaller_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=False,
                                              learning=False)


def plot_performance_depth_based_classifier():
    metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
    performance = []
    depth_range = range (1, 8)
    for depth in depth_range:
        performance.append([])
        metrics = evaluate_depth_based_classifier_cross_validated(depth)
        for metric_name, metric in zip(metric_names, metrics):
            performance[-1].append((metric.mean(), metric.std()))
    performance = np.array(performance).T
    for idx in range(len(metric_names)):
        plt.figure()
        plt.errorbar(depth_range, performance[0,idx,:], yerr=performance[1,idx,:])
        plt.xlabel('depth')
        plt.ylabel(metric_names[idx])
    return performance

        
def evaluate_majority_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    tuned_clf = MajorityClassClassifier()
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def evaluate_random_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    tuned_clf = StratifiedRandomClassifier()
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def produce_learning_curve(evaluation_fn_of_training_size, training_sizes):
    result = []
    for training_size in training_sizes:
        result.append(evaluation_fn_of_training_size(training_size=training_size))
    return np.array(result)

In [41]:
depth_based_perf = plot_performance_depth_based_classifier()


WARNING:root:Loop 'Computer data'<->'Data processing'
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Probability and statistics'<->'Statistics'
WARNING:root:Loop 'Film and video technology'<->'Multimedia'
WARNING:root:Loop 'Euthenics'<->'Critical thinking'
WARNING:root:Loop 'Euthenics'<->'Life skills'
WARNING:root:Loop 'Euthenics'<->'Personal life'
WARNING:root:Loop 'Euthenics'<->'Socioeconomics'
WARNING:root:Loop 'NBCUniversal networks'<->'National Broadcasting Company'
WARNING:root:Loop 'Logic'<->'Mathematical logic'
WARNING:root:Loop 'Logic'<->'Philosophical logic'
WARNING:root:Loop 'Logic'<->'Philosophy of logic'
WARNING:root:Loop 'Mathematical objects'<->'Mathematical structures'
WARNING:root:Loop 'Mathematics'<->'Fields of mathematics'
WARNING:root:Loop 'Mathematics'<->'Mathematical concepts'
WARNING:root:Loop 'Mathematics'<->'Philosophy of mathematics'
WARNING:root:Loop 'Philosophy'<->'Branches of philosophy'
WARNING:root:Loop 'Philosophy'<->'Philosophy by field'
WARNING:root:Loop 'Applied mathematics'<->'Cybernetics'
WARNING:root:Loop 'Social sciences methodology'<->'Evaluation methods'
WARNING:root:Loop 'Social sciences methodology'<->'Methods in sociology'
WARNING:root:Loop 'Economic planning'<->'Socialism'
WARNING:root:Loop 'Quantitative research'<->'Mathematical and quantitative methods (economics)'
WARNING:root:Loop 'Interactive art'<->'New media art'
WARNING:root:Loop 'Hellenistic philosophy and religion'<->'Hellenistic religion'
WARNING:root:Loop 'Social programs'<->'Health, education, and welfare economics'
WARNING:root:Loop 'Free will'<->'Personhood'
WARNING:root:Loop 'Product management'<->'Product development'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
/usr/local/lib/python3.4/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)

In [43]:
%matplotlib inline
plt.errorbar(range(1,8), depth_based_perf.T[:,0,0], yerr=depth_based_perf.T[:,0,1], linewidth=2, elinewidth=1)
plt.plot(range(1, 8), [0.922] * 7, linewidth=2)
plt.xlabel('Depth of selection')
plt.ylabel('Accuracy')
plt.fill_between(range(1, 8), [0.922-0.011] * 7, [0.922+0.011] * 7, alpha=0.25, color='green')
plt.legend(['depth-based selection', 'our method'], loc='lower left')
plt.tight_layout()
plt.savefig(util.resource('accuracy-depth-based.pdf'))


Cross-validated performance of the classifiers in terms of 4 metrics with error bars


In [46]:
metrics = evaluate_random_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.2f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))


Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.54 +- 0.054
f1_pos      : 0.38 +- 0.082
f1_neg      : 0.58 +- 0.030
weighted_f1 : 0.50 +- 0.052

Plotting the learning curve of a classifier, cross-validated with error bars


In [119]:
# training_sizes = [10, 15, 20, 25, 30, 35]
# metrics_ours_lc = produce_learning_curve(evaluate_learning_based_classifier_cross_validated, training_sizes)


Fold 1
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Super Robot Wars'<->'Real Robots'
WARNING:root:Loop 'Super Robot Wars'<->'Super Robots'
WARNING:root:Loop 'Unreal'<->'Unreal Engine'
WARNING:root:Loop 'Alphabets'<->'Collation'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'
WARNING:root:Loop 'Latin script'<->'Latin alphabets'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 1
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'


In [137]:
# accuracies = metrics_ours_lc[:,0]
# f1s = metrics_ours_lc[:,3]
# plt.errorbar(training_sizes, accuracies.mean(axis=1), accuracies.std(axis=1))
# plt.errorbar(training_sizes, f1s.mean(axis=1), f1s.std(axis=1))
# plt.ylim(0.4, 1)
# plt.xlim(9, 30.2)
# plt.legend(['Accuracy', 'Weighted F1 score'], loc='lower right')
# plt.xlabel('Training data size (# points)')
# plt.tight_layout()
# plt.savefig(util.resource('node-selection-learning-curve.pdf'))

# plt.figure()
# plt.errorbar(training_sizes, f1s.mean(axis=1), f1s.std(axis=1))
# plt.plot(training_sizes, np.full_like(training_sizes, 0.908, dtype=float))
# plt.ylim(0.7, 1)
# (f1s[-1].mean(), f1s[-1].std())


Performance of the learning-based classifier


In [59]:
metrics = evaluate_learning_based_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))


WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
Fold 1
Fold 2
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
Fold 3
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
Fold 4
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
Fold 5
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
accuracy    : 0.924 +- 0.006
f1_pos      : 0.907 +- 0.008
f1_neg      : 0.935 +- 0.005
weighted_f1 : 0.924 +- 0.006

Performance of the depth-based baseline


In [61]:
metrics = evaluate_depth_based_classifier_cross_validated(depth=4)
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))


WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.908 +- 0.009
f1_pos      : 0.889 +- 0.013
f1_neg      : 0.922 +- 0.007
weighted_f1 : 0.908 +- 0.009

Performance of the majority rule baseline


In [63]:
metrics = evaluate_majority_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))


Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.589 +- 0.002
f1_pos      : 0.000 +- 0.000
f1_neg      : 0.741 +- 0.001
weighted_f1 : 0.436 +- 0.002

Performance of the stratified random baseline


In [64]:
metrics = evaluate_random_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))


Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.539 +- 0.054
f1_pos      : 0.381 +- 0.082
f1_neg      : 0.582 +- 0.030
weighted_f1 : 0.503 +- 0.052

Plot the distribution of relevant and irrelevant topics in the whole selection according to the learning-based classifier, and depending on the depth


In [65]:
def stats_on_relevant_topics_according_to_classifier(clf, topic_df):
    feature_columns = default_features.feature_names()
    topic_df = topic_df.copy()
    topic_df['prediction'] = clf.predict(topic_df['topic'])
    topic_df = topic_df.drop('is_relevant', axis=1)
    feature_and_prediction_columns = feature_columns + ['prediction']
    stats_df = topic_df.dropna().groupby(['depth', 'prediction'])\
        ['topic']\
        .count()\
        .unstack('prediction')\
        .fillna(0)\
        .sort(ascending=False)
    stats_df = stats_df.rename(columns={False: 'irrelevant', True: 'relevant'})
    stats_df.plot(kind='barh', stacked=True)
    plt.xlabel('The number of topics')
    stats_df['total'] = stats_df['relevant'] + stats_df['irrelevant']
    stats_df['percent_relevant'] = (stats_df['relevant'] / stats_df['total']).round(3)
    return stats_df.sort(ascending=True)

stats_on_relevant_topics_according_to_classifier(
    default_trained_topic_selection_classifier(),
    precompute_topic_data_frame())


Out[65]:
prediction irrelevant relevant total percent_relevant
depth
1 0 38 38 1.000
2 1 421 422 0.998
3 0 1938 1938 1.000
4 122 3630 3752 0.967
5 3154 1131 4285 0.264
6 5453 0 5453 0.000
7 12396 0 12396 0.000

Checking the performance of the depth4-based selection, SVM and decision tree


In [66]:
def compare_and_plot_the_performances_of_3_classifiers(topic_df):
    
    topics = topic_df.dropna()['topic']
    classes = topic_df.dropna()['is_relevant']    

    def eval_clf(clf, topic_df):
        topics = topic_df.dropna()['topic']
        classes = topic_df.dropna()['is_relevant']
        return [evaluate_classifier(clf, topics, classes, util.f1_pos_class),
                evaluate_classifier(clf, topics, classes, util.f1_neg_class)]

    def overfitting_svm_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_params']['C'] = 1000
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)

    def default_tree_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_fn'] = DecisionTreeClassifier
        training_params['classifier_params'] = {}
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)

    def default_k1n_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_fn'] = KNeighborsClassifier
        training_params['classifier_params'] = {'n_neighbors' : 1}
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)
    
    print("F1 measures, classes 'relevant' and 'irrelevant' for different classifiers:")
    
    print("Depth-4 classifier:", eval_clf(depth_based_selection(max_depth=4), topic_df))
    print("SVM, with C=1000:", eval_clf(overfitting_svm_clf(topic_df), topic_df))
    print("Decision tree:", eval_clf(default_tree_clf(topic_df), topic_df))
    print("1-nearest neighbor:", eval_clf(default_k1n_clf(topic_df), topic_df))
    
compare_and_plot_the_performances_of_3_classifiers(precompute_topic_data_frame())


WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
F1 measures, classes 'relevant' and 'irrelevant' for different classifiers:
Depth-4 classifier:
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
 [0.88888888888888884, 0.92164674634794164]
SVM, with C=1000:
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
 [0.90737240075614378, 0.93509933774834442]
Decision tree:
WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Unreal'<->'Unreal Engine'
 [0.92307692307692302, 0.9498069498069498]
1-nearest neighbor: [0.92366412213740445, 0.94736842105263164]

Plotting the data (with some noise) agains a pair of dimensions


In [69]:
def plot_points_2d(topic_df, var1, var2, response):
    from matplotlib.pylab import figure, subplot
    from matplotlib.colors import ListedColormap
    h = .02
    cm_bright = ListedColormap(['#FF0000', '#00FF00'])
    X1 = topic_df.dropna()[var1].values
    X2 = topic_df.dropna()[var2].values
    y = topic_df.dropna()[response]
    
    x1_min, x1_max = X1.min() - .1, X1.max() + .1
    x2_min, x2_max = X2.min() - .1, X2.max() + .1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h),
                           np.arange(x2_min, x2_max, h))
    
    figure(figsize=(10, 8))
    ax = subplot(111)
    # Plot the testing points
    plt.scatter(X1 + np.random.normal(0, 0.02, len(X1)),
                X2 + np.random.normal(0, 0.02, len(X2)),
                c=y.astype(int), cmap=cm_bright,
                marker='+', alpha=1)

    ax.set_xlim(xx1.min(), xx1.max())
    ax.set_ylim(xx2.min(), xx2.max())
    ax.set_xlabel(var1)
    ax.set_ylabel(var2)
    ax.set_xticks(())
    ax.set_yticks(())
    
def plot_topics_against_couple_of_dimensions(topic_df):
    plot_points_2d(topic_df, 'frac_parents_in_graph', 'avg_normalized_parent_depth', 'is_relevant')
    plot_points_2d(topic_df, 'normalized_depth', 'avg_normalized_parent_depth', 'is_relevant')
    plot_points_2d(topic_df, 'normalized_depth', 'frac_parents_in_graph', 'is_relevant')
    plot_points_2d(topic_df, 'median_normalized_parent_depth', 'avg_normalized_parent_depth', 'is_relevant')
    
plot_topics_against_couple_of_dimensions(precompute_topic_data_frame())


Displaying the cases when both relevant and irrelevant topics map to the same point in the feature space


In [70]:
def list_topics_that_map_to_same_points(topic_df):
    feature_cols = default_features.feature_names()
    grouped = topic_df.dropna().groupby(feature_cols)
    def group_has_both_relevant_and_irrelevant_topis(group):
        return group['is_relevant'].nunique() > 1
    grouped = grouped.filter(group_has_both_relevant_and_irrelevant_topis)
    grouped = grouped.groupby(feature_cols)
    for group_idx in grouped.groups.values():
        print(topic_df.ix[group_idx]
              [['depth', 'title', 'is_relevant']]
              .sort('is_relevant'))

list_topics_that_map_to_same_points(precompute_topic_data_frame())


      depth                                title is_relevant
8250      5              Abstract strategy games       False
7609      5                       Anime industry       False
7610      5                      Comics industry       False
8234      5  Hertzsprung–Russell classifications       False
8237      5              Language classification       False
6914      5                        Mixed reality        True
6944      5                       Graph coloring        True
7081      5                        Linux viruses        True
6833      5          Learning in computer vision        True
      depth                            title is_relevant
2538      4                  Sound recording       False
2713      4  Free software operating systems        True
3503      4                    Linux malware        True
3739      4           Robotics organizations        True
      depth                   title is_relevant
4344      4               Reference       False
3987      4          Media industry       False
4301      4             Cartography       False
4308      4                Notation       False
2517      4  Scientific observation       False
4314      4    Electronic documents        True
3029      4                Spamming        True
3305      4         Computer vision        True
3365      4            Graph theory        True
4326      4  Classification systems        True
       depth                     title is_relevant
14393      6            Encyclopedists       False
10570      6       Physical quantities       False
11094      6                  Lawsuits       False
11146      6       Science experiments       False
11986      6  Dennis the Menace (U.S.)       False
13092      6                   Amulets       False
13182      6       Radio-related lists       False
14261      6  Organizational structure       False
14339      6                  Case law       False
11034      6            Remote sensing        True
      depth                                     title is_relevant
2512      4                            Interrogations       False
4336      4                                Catalogues       False
4740      4                               Whole Earth       False
2879      4                              Type systems        True
3310      4                          Machine learning        True
3664      4  Dedicated application electronic devices        True
      depth                 title is_relevant
7619      5            Publishing       False
8273      5               Sources       False
6810      5  Notepad replacements        True
8864      5      SVGAlib programs        True
     depth                     title is_relevant
428      2      Internet-based works       False
87       2                Data types        True
148      2  Software design patterns        True
212      2              Data centers        True
377      2                 Microsoft        True
405      2           User interfaces        True
      depth                title is_relevant
1359      3  Information economy       False
1141      3   Internet protocols        True
      depth            title is_relevant
4321      4          Museums       False
4345      4  Reference works       False
3311      4  Problem solving        True
      depth              title is_relevant
9851      5  Capcom characters       False
6395      5       SFTP clients        True
      depth                             title is_relevant
4335      4                      Bibliography       False
3247      4  Theorem proving software systems        True
3657      4                       ZX Spectrum        True
2523      4           Visualization (graphic)        True
4794      4                         Typefaces        True
4803      4           Vector graphics editors        True
5182      4                       PLATO games        True
      depth                            title is_relevant
8104      5           Perspective projection       False
6827      5  Applications of computer vision        True
      depth                 title is_relevant
6254      5            Psephology       False
6577      5  Stock market indices       False
6855      5    Dispute resolution       False
8133      5              Heraldry       False
8573      5              Currency       False
7171      5           LibreOffice        True
       depth               title is_relevant
11920      6      Manga industry       False
11250      6  4-chromatic graphs        True
      depth               title is_relevant
6858      5         Imagination       False
7351      5              Gundam       False
8173      5               Radio       False
8291      5  Travel guide books       False
6856      5         Game theory        True
      depth                                     title is_relevant
8245      5           Systems of taxonomy by organism       False
9340      5                              Konami media       False
6941      5  Extensions and generalizations of graphs        True
      depth                  title is_relevant
1628      3        Library science       False
1441      3       Windows software        True
1585      3  Mathematical software        True
      depth            title is_relevant
4280      4    Sign language       False
3349      4  Virtual reality        True
      depth                        title is_relevant
2297      3  Online publishing companies       False
689       3       Statistical data types        True
      depth               title is_relevant
9160      5      No More Heroes       False
8724      5    Wired (magazine)       False
6282      5    Music production       False
7359      5       Radio control       False
7712      5  Total Annihilation       False
8082      5   Geographic images       False
8085      5         Hydrography       False
8145      5   Military insignia       False
8178      5          Archivists       False
8274      5           Standards       False
8281      5        Dictionaries       False
8494      5         Svyazinvest       False
6870      5      Bioinformatics        True
8540      5  Oracle Corporation        True
      depth                      title is_relevant
1614      3        Academic literature       False
1009      3    Artificial intelligence        True
1232      3                   Robotics        True
1233      3                     Robots        True
1620      3  Human–machine interaction        True
      depth                 title is_relevant
8140      5         Lucky symbols       False
8209      5         Museum people       False
8219      5       Types of museum       False
8252      5     Military strategy       False
8771      5  Government typefaces        True
      depth                                             title is_relevant
2840      4                                      Epidemiology       False
4260      4  Nomenclature of Territorial Units for Statistics       False
4312      4                                           Symbols       False
5871      4                                             Linux        True

Examine the topics with level <= 4 that our classifier predicted as irrelevant


In [71]:
def report_topics_level_up_to_4_predicted_as_irrelevant(clf, topic_df):
    topic_df = topic_df.copy()
    topic_df['predicted_relevant'] = clf.predict(topic_df['topic'])
    return topic_df[topic_df['depth'] <= 4][topic_df['predicted_relevant'] == False][['title', 'depth']]

report_topics_level_up_to_4_predicted_as_irrelevant(
    default_trained_topic_selection_classifier(),
    precompute_topic_data_frame())


/usr/local/lib/python3.4/site-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[71]:
title depth
65 Data processing 2
2419 Morse code 4
2420 Alphabets 4
2438 Spintronics 4
2464 Compact disc 4
2467 Holographic memory 4
2508 Time series analysis 4
2509 Censuses 4
2510 Design of experiments 4
2514 Observational study 4
2516 Questionnaire construction 4
2518 Survey methodology 4
2527 E-democracy 4
2558 Substring indices 4
2568 Demodulation 4
2570 Software-defined radio 4
2697 Novell NetWare 4
2769 Soviet computer systems 4
2837 Cohort study methods 4
2838 Cross-sectional analysis 4
2840 Epidemiology 4
2843 Index numbers 4
2867 Abstract machines 4
2870 Combinatory logic 4
2872 Lambda calculus 4
2890 Substitution (logic) 4
2964 Temporal logic 4
2993 Cryptocurrencies 4
3049 Comcast Corporation 4
3073 Submarine communications cables 4
... ... ...
4345 Reference works 4
4427 VimpelCom Ltd. 4
4437 LG Electronics 4
4440 Samsung Electronics 4
4453 Digital gold currencies 4
4463 Payment systems 4
4485 Visitor attractions in Silicon Valley 4
4524 Telecom Italia 4
4714 Internet-based activism 4
4751 CJK typefaces 4
4793 Type foundries 4
4805 Anatomical simulation 4
4894 Synthesizer manufacturing companies 4
5015 Code Lyoko 4
5016 Digimon 4
5059 MSNBC 4
5193 MeeGo 4
5651 Firefox 4
5656 Copyleft 4
5664 Open access (publishing) 4
5753 Fangames 4
5764 Ren'Py games and visual novels 4
5818 Speech synthesis 4
5871 Linux 4
5875 Unification (computer science) 4
5947 Stack Exchange network 4
5988 Alumni associations 4
6019 DARPA Grand Challenge 4
6025 Red vs. Blue 4
6139 Web documentaries 4

123 rows × 2 columns


In [72]:
def report_shortest_path(selection, topic):
    return TrainingDataSelection(selection)._shortest_path_from_root(to_category_uri(topic));

In [75]:
report_shortest_path(precompute_full_selection(), "Library science")


Out[75]:
['http://dbpedia.org/resource/Category:Computing',
 'http://dbpedia.org/resource/Category:Information_technology',
 'http://dbpedia.org/resource/Category:Information_science',
 'http://dbpedia.org/resource/Category:Library_science']

In [76]:
def get_subcats(topic):
    with WikipediaGraphIndex() as wiki:
        return wiki.get_subcats(topic)
    
def get_supercats(topic):
    with WikipediaGraphIndex() as wiki:
        return wiki.get_supercats(topic)
    
def childrens_parents_with_duplicates(topic):
    return [parent for child in get_subcats(topic)
                   for parent in get_supercats(child)
                   if parent != topic]

In [78]:
def default_trained_topic_selection_classifier(precomputed_data={}):
    if 'default_clf' not in precomputed_data:
        full_selection = precompute_full_selection()
        training_data = read_ground_truth_data()
        training_params = new_training_params()
        training_params['classifier_params']['C'] = 1
        training_params['instance_weight_fn'] = lambda x : 1
        clf = train_topic_classifier(
                training_data.keys(), training_data.values(),
                full_selection,
                **training_params)
        precomputed_data['default_clf'] = clf
    return precomputed_data['default_clf']