In [1]:

    
import os
# This hack makes ipython notebook import the modules correctly.
if (os.path.basename(os.getcwd()) == 'dswont'):
    os.chdir(os.path.dirname(os.getcwd()))



In [74]:

    
import logging
logging.basicConfig(level=logging.ERROR)

import collections
from contextlib import closing
import copy
import itertools
from matplotlib import pyplot as plt
import nltk
import numpy as np
import pandas as pd
import random
import semidbm

from sklearn.svm import SVC
from sklearn.base import BaseEstimator
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model.base import LinearModel
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn import cross_validation

from dswont import dbpedia
from dswont.dbpedia import uri_to_title, title_to_uri, to_title, to_category_uri
from dswont import util
from dswont import wikiapi

from dswont.topics import CategorySelectionClassifier
from dswont.topics import default_classifier
from dswont.topics import default_classifier_params
from dswont.topics import default_cross_validation
from dswont.topics import default_data
from dswont.topics import default_instance_weight_fn
from dswont.topics import default_features
from dswont.topics import depth_based_selection
from dswont.topics import evaluate_classifier
from dswont.topics import new_training_params
from dswont.topics import precompute_full_selection
from dswont.topics import read_ground_truth_data
from dswont.topics import train_topic_classifier
from dswont.topics import TrainingDataSelection

Analyzing the performance of the classifiers

Building the DataFrame of topics



In [11]:

    
def make_topic_df(full_selection):
    full_selection_df = pd.DataFrame({'topic':list(full_selection)})
    full_selection_df['depth'] = full_selection_df['topic'].apply(full_selection.get_depth)
    
    ground_truth_topic_relevance_data = read_ground_truth_data().items()
    ground_truth_topic_relevance_topics, ground_truth_topic_relevance_relevance =\
        zip(*read_ground_truth_data().items())
    ground_truth_topic_relevance_df = pd.DataFrame(
        {'topic':ground_truth_topic_relevance_topics,
         'is_relevant':ground_truth_topic_relevance_relevance})
    full_selection_df = full_selection_df.merge(ground_truth_topic_relevance_df, how='outer')
    
    full_selection_df['title'] = full_selection_df['topic'].apply(to_title)
    full_selection_df =\
        full_selection_df.reindex(columns=['topic', 'title', 'depth', 'is_relevant'])
    return full_selection_df



In [12]:

    
def make_feature_df(selection, features, topics):
    feature_maps = features.compute(selection, topics)
    for topic, feature_map in zip(topics, feature_maps):
        feature_map['topic'] = topic
    return pd.DataFrame(feature_maps)

Analyzing the classifier mistakes



In [13]:

    
def make_mistake_df(clf, feature_df):
    df = feature_df.copy()
    df['prediction'] = clf.predict(df['topic'])
    mistake_indices = (df['prediction'] != df['is_relevant'])
    return df[mistake_indices].dropna()

Experiments

Pre-computed data for faster execution



In [14]:

    
def precompute_topic_data_frame(precomputed_data={}):
    if not 'topic_df' in precomputed_data:
        full_selection = precompute_full_selection()
        topics = [topic for topic in full_selection][1:]
        feature_df = make_feature_df(full_selection, default_features, topics)
        topic_only_df = make_topic_df(full_selection)
        topic_df = topic_only_df.merge(feature_df)
        precomputed_data['full_selection'] = full_selection
        precomputed_data['topics'] = topics
        precomputed_data['topic_df'] = topic_df
    return precomputed_data['topic_df']

The distribution of relevant and irrelevant topics in the ground truth, depending on the depth



In [15]:

    
def stats_on_relevant_topics_in_ground_truth_vs_depth():
    feature_columns = default_features.feature_names()
    topic_df = precompute_topic_data_frame()
    feature_and_prediction_columns = feature_columns + ['is_relevant']
    stats_df = topic_df.dropna().groupby(['depth', 'is_relevant'])\
        ['title']\
        .count()\
        .unstack(['is_relevant'])\
        .fillna(0)\
        .sort(ascending=False)
    stats_df = stats_df.rename(columns={False: 'irrelevant', True: 'relevant'})
    stats_df.plot(kind='barh', stacked=True)
    plt.xlabel('The number of topics')
    stats_df['total'] = stats_df['relevant'] + stats_df['irrelevant']
    stats_df['percent_relevant'] = (stats_df['relevant'] / stats_df['total']).round(3)
    return stats_df.sort(ascending=True)

stats_on_relevant_topics_in_ground_truth_vs_depth()









    Out[15]:






  
    
      is_relevant
      relevant
      irrelevant
      total
      percent_relevant
    
    
      depth
      
      
      
      
    
  
  
    
      1
       20
         0
        20
       1.000
    
    
      2
       69
         1
        70
       0.986
    
    
      3
       92
         5
        97
       0.948
    
    
      4
       55
        25
        80
       0.688
    
    
      5
       25
        85
       110
       0.227
    
    
      6
        3
       163
       166
       0.018
    
    
      7
        0
        99
        99
       0.000

Training the topic selection classifier



In [16]:

    
def default_trained_topic_selection_classifier(precomputed_data={}):
    if 'clf' not in precomputed_data:
        full_selection = precompute_full_selection()
        training_data = default_data()
        training_params = new_training_params()
        clf = train_topic_classifier(training_data.keys(), training_data.values(),
                                     full_selection,
                                     **training_params)
        precomputed_data['clf'] = clf
    return precomputed_data['clf']



In [25]:

    
topics, classes = list(zip(*default_data().items()))
clf = default_trained_topic_selection_classifier()
print(evaluate_classifier(clf, topics, classes, util.accuracy_score))
print(evaluate_classifier(clf, topics, classes, util.f1_pos_class))
print(evaluate_classifier(clf, topics, classes, util.f1_neg_class))
print(evaluate_classifier(clf, topics, classes, util.weighted_f1))









    



0.920560747664
0.902857142857
0.932806324111
0.920490772941

Examining the test performance of the learning-based classifier on the whole data



In [30]:

    
def report_test_performance_of_the_learning_based_classifier():
    # Preparing the data.
    topic_df = precompute_topic_data_frame().dropna()
    topic_df['is_relevant'] = topic_df['is_relevant'].astype(bool)
    topics_and_classes = topic_df[['topic', 'is_relevant']].values
    topics = topics_and_classes[:,0]
    classes = topics_and_classes[:,1]
    # Preparing the params.
    classifier_params = default_classifier_params.copy()
    classifier_params['C'] = 3
    classifier_params['class_weight'] = {1:2, 0:1}
    classifier_params['kernel'] = 'linear'
    classifier_params['kernel'] = 'rbf'
    classifier_fn = default_classifier
    full_selection = precompute_full_selection()
    clf = train_topic_classifier(topics, classes,
                                 full_selection, full_selection._max_depth, 
                                 default_features, classifier_fn,
                                 (lambda x : 1),
                                 classifier_params)
    print(clf)
    for measure in [util.accuracy_score, util.f1_pos_class, util.f1_neg_class, util.weighted_f1]:
        print(evaluate_classifier(clf, topics, classes, measure))

report_test_performance_of_the_learning_based_classifier()









    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'






    



CategorySelectionClassifier(C=3, class_weight={0: 1, 1: 2},
              classifier_fn=<function default_classifier at 0x10ba5a840>,
              classifier_params={'C': 3, 'class_weight': {0: 1, 1: 2}, 'kernel': 'rbf'},
              features=<dswont.topics.TopicFeatures object at 0x10ba547b8>,
              full_selection=<dswont.topics.CategorySelection object at 0x10bbea588>,
              instance_weight=<function report_test_performance_of_the_learning_based_classifier.<locals>.<lambda> at 0x1125e1bf8>,
              kernel='rbf', max_depth=7)
0.915887850467
0.900735294118
0.927027027027
0.916215473307

Reporting the cross-validated performance of the learning-based classifier



In [51]:

    
def default_classifier_evaluation_params():
    topic_df = precompute_topic_data_frame().dropna()
    topic_df['is_relevant'] = topic_df['is_relevant'].astype(bool)
    topics_and_classes = topic_df[['topic', 'is_relevant']].values
    topics = topics_and_classes[:,0]
    classes = topics_and_classes[:,1]
    np.random.shuffle(topics_and_classes)
    inner_cross_validation = None  # No inner cross-validation.
    outer_cross_validation = default_cross_validation
#     outer_cross_validation = lambda outputs : cross_validation.KFold(len(outputs), n_folds=2)
    def model_selection_measure(*args, **params):
        return util.weighted_f1(*args, **params)
    evaluation_measures = [util.accuracy_score, util.f1_pos_class, util.f1_neg_class, util.weighted_f1]
    return topics, classes, inner_cross_validation,\
           outer_cross_validation, model_selection_measure,\
           evaluation_measures

        
def evaluate_learning_based_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
#     def smaller_cross_validation(outputs):
#         return cross_validation.StratifiedKFold(outputs, n_folds=2)
#     inner_cross_validation = lambda outputs: cross_validation.StratifiedKFold(outputs, n_folds=3)
#     param_grid = new_training_params_cv()['param_grid']
#     param_grid[0]['C'] = [0.25, 0.5, 1, 3, 7, 15]
#     param_grid = None
    full_selection = precompute_full_selection()
    features = default_features.copy()
    classifier_params = default_classifier_params.copy()
    classifier_params['C'] = 1
    classifier_params['kernel'] = 'linear'
#     classifier_params['kernel'] = 'rbf'
#     classifier_params['gamma'] = 1
    tuned_clf = CategorySelectionClassifier(full_selection=full_selection,
                                            features=features,
                                            classifier_fn=default_classifier,
                                            max_depth=full_selection._max_depth,
                                            instance_weight=default_instance_weight_fn,
                                            **classifier_params)
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
#                                               smaller_cross_validation,                                              
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def evaluate_depth_based_classifier_cross_validated(depth):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    def smaller_cross_validation(outputs):
        return cross_validation.StratifiedKFold(outputs, n_folds=2)
    full_selection = precompute_full_selection()
    tuned_clf = depth_based_selection(full_selection._root, depth)
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
#                                               smaller_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=False,
                                              learning=False)


def plot_performance_depth_based_classifier():
    metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
    performance = []
    depth_range = range (1, 8)
    for depth in depth_range:
        performance.append([])
        metrics = evaluate_depth_based_classifier_cross_validated(depth)
        for metric_name, metric in zip(metric_names, metrics):
            performance[-1].append((metric.mean(), metric.std()))
    performance = np.array(performance).T
    for idx in range(len(metric_names)):
        plt.figure()
        plt.errorbar(depth_range, performance[0,idx,:], yerr=performance[1,idx,:])
        plt.xlabel('depth')
        plt.ylabel(metric_names[idx])
    return performance

        
def evaluate_majority_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    tuned_clf = MajorityClassClassifier()
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def evaluate_random_classifier_cross_validated(training_size=None):
    np.random.seed(0)
    topics, classes, inner_cross_validation,\
        outer_cross_validation, model_selection_measure,\
        evaluation_measures = default_classifier_evaluation_params()
    tuned_clf = StratifiedRandomClassifier()
    return train_evaluate_topic_classifier_cv(tuned_clf, topics, classes,
                                              inner_cross_validation,
                                              outer_cross_validation,
                                              model_selection_measure,
                                              evaluation_measures,
                                              param_grid=None,
                                              learning=True,
                                              training_size=training_size)


def produce_learning_curve(evaluation_fn_of_training_size, training_sizes):
    result = []
    for training_size in training_sizes:
        result.append(evaluation_fn_of_training_size(training_size=training_size))
    return np.array(result)



In [41]:

    
depth_based_perf = plot_performance_depth_based_classifier()









    



WARNING:root:Loop 'Computer data'<->'Data processing'






    



Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'






    



Fold 2
Fold 3
Fold 4
Fold 5
Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'






    



Fold 2
Fold 3
Fold 4
Fold 5
Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'






    



Fold 2
Fold 3
Fold 4
Fold 5
Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'






    



Fold 2
Fold 3
Fold 4
Fold 5
Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Probability and statistics'<->'Statistics'
WARNING:root:Loop 'Film and video technology'<->'Multimedia'
WARNING:root:Loop 'Euthenics'<->'Critical thinking'
WARNING:root:Loop 'Euthenics'<->'Life skills'
WARNING:root:Loop 'Euthenics'<->'Personal life'
WARNING:root:Loop 'Euthenics'<->'Socioeconomics'
WARNING:root:Loop 'NBCUniversal networks'<->'National Broadcasting Company'
WARNING:root:Loop 'Logic'<->'Mathematical logic'
WARNING:root:Loop 'Logic'<->'Philosophical logic'
WARNING:root:Loop 'Logic'<->'Philosophy of logic'
WARNING:root:Loop 'Mathematical objects'<->'Mathematical structures'
WARNING:root:Loop 'Mathematics'<->'Fields of mathematics'
WARNING:root:Loop 'Mathematics'<->'Mathematical concepts'
WARNING:root:Loop 'Mathematics'<->'Philosophy of mathematics'
WARNING:root:Loop 'Philosophy'<->'Branches of philosophy'
WARNING:root:Loop 'Philosophy'<->'Philosophy by field'
WARNING:root:Loop 'Applied mathematics'<->'Cybernetics'
WARNING:root:Loop 'Social sciences methodology'<->'Evaluation methods'
WARNING:root:Loop 'Social sciences methodology'<->'Methods in sociology'
WARNING:root:Loop 'Economic planning'<->'Socialism'
WARNING:root:Loop 'Quantitative research'<->'Mathematical and quantitative methods (economics)'
WARNING:root:Loop 'Interactive art'<->'New media art'
WARNING:root:Loop 'Hellenistic philosophy and religion'<->'Hellenistic religion'
WARNING:root:Loop 'Social programs'<->'Health, education, and welfare economics'
WARNING:root:Loop 'Free will'<->'Personhood'
WARNING:root:Loop 'Product management'<->'Product development'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'






    



Fold 2
Fold 3
Fold 4
Fold 5
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5






    



/usr/local/lib/python3.4/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)



In [43]:

    
%matplotlib inline
plt.errorbar(range(1,8), depth_based_perf.T[:,0,0], yerr=depth_based_perf.T[:,0,1], linewidth=2, elinewidth=1)
plt.plot(range(1, 8), [0.922] * 7, linewidth=2)
plt.xlabel('Depth of selection')
plt.ylabel('Accuracy')
plt.fill_between(range(1, 8), [0.922-0.011] * 7, [0.922+0.011] * 7, alpha=0.25, color='green')
plt.legend(['depth-based selection', 'our method'], loc='lower left')
plt.tight_layout()
plt.savefig(util.resource('accuracy-depth-based.pdf'))

Cross-validated performance of the classifiers in terms of 4 metrics with error bars



In [46]:

    
metrics = evaluate_random_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.2f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))









    



Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.54 +- 0.054
f1_pos      : 0.38 +- 0.082
f1_neg      : 0.58 +- 0.030
weighted_f1 : 0.50 +- 0.052

Plotting the learning curve of a classifier, cross-validated with error bars



In [119]:

    
# training_sizes = [10, 15, 20, 25, 30, 35]
# metrics_ours_lc = produce_learning_curve(evaluate_learning_based_classifier_cross_validated, training_sizes)









    



Fold 1
Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Espionage'<->'Information sensitivity'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'NBCUniversal'<->'National Broadcasting Company'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Super Robot Wars'<->'Real Robots'
WARNING:root:Loop 'Super Robot Wars'<->'Super Robots'
WARNING:root:Loop 'Unreal'<->'Unreal Engine'
WARNING:root:Loop 'Alphabets'<->'Collation'
WARNING:root:Loop 'Latin alphabets'<->'Latin alphabet'
WARNING:root:Loop 'Latin script'<->'Latin alphabets'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'






    



Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'






    



Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'






    



Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'
WARNING:root:Loop 'Audio engineering'<->'Audio electronics'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'






    



Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'






    



Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 1





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unreal Engine'<->'Unreal Engine games'






    



Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'



In [137]:

    
# accuracies = metrics_ours_lc[:,0]
# f1s = metrics_ours_lc[:,3]
# plt.errorbar(training_sizes, accuracies.mean(axis=1), accuracies.std(axis=1))
# plt.errorbar(training_sizes, f1s.mean(axis=1), f1s.std(axis=1))
# plt.ylim(0.4, 1)
# plt.xlim(9, 30.2)
# plt.legend(['Accuracy', 'Weighted F1 score'], loc='lower right')
# plt.xlabel('Training data size (# points)')
# plt.tight_layout()
# plt.savefig(util.resource('node-selection-learning-curve.pdf'))

# plt.figure()
# plt.errorbar(training_sizes, f1s.mean(axis=1), f1s.std(axis=1))
# plt.plot(training_sizes, np.full_like(training_sizes, 0.908, dtype=float))
# plt.ylim(0.7, 1)
# (f1s[-1].mean(), f1s[-1].std())

Performance of the learning-based classifier



In [59]:

    
metrics = evaluate_learning_based_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))









    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'






    



Fold 1
Fold 2





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'






    



Fold 3





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'






    



Fold 4





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'






    



Fold 5





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'






    



accuracy    : 0.924 +- 0.006
f1_pos      : 0.907 +- 0.008
f1_neg      : 0.935 +- 0.005
weighted_f1 : 0.924 +- 0.006

Performance of the depth-based baseline



In [61]:

    
metrics = evaluate_depth_based_classifier_cross_validated(depth=4)
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))









    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'






    



Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.908 +- 0.009
f1_pos      : 0.889 +- 0.013
f1_neg      : 0.922 +- 0.007
weighted_f1 : 0.908 +- 0.009

Performance of the majority rule baseline



In [63]:

    
metrics = evaluate_majority_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))









    



Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.589 +- 0.002
f1_pos      : 0.000 +- 0.000
f1_neg      : 0.741 +- 0.001
weighted_f1 : 0.436 +- 0.002

Performance of the stratified random baseline



In [64]:

    
metrics = evaluate_random_classifier_cross_validated()
metric_names = ['accuracy', 'f1_pos', 'f1_neg', 'weighted_f1']
for metric_name, metric in zip(metric_names, metrics):
    print("{:<11s} : {:.3f} +- {:.3f}".format(metric_name, metric.mean(), metric.std()))









    



Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
accuracy    : 0.539 +- 0.054
f1_pos      : 0.381 +- 0.082
f1_neg      : 0.582 +- 0.030
weighted_f1 : 0.503 +- 0.052

Plot the distribution of relevant and irrelevant topics in the whole selection according to the learning-based classifier, and depending on the depth



In [65]:

    
def stats_on_relevant_topics_according_to_classifier(clf, topic_df):
    feature_columns = default_features.feature_names()
    topic_df = topic_df.copy()
    topic_df['prediction'] = clf.predict(topic_df['topic'])
    topic_df = topic_df.drop('is_relevant', axis=1)
    feature_and_prediction_columns = feature_columns + ['prediction']
    stats_df = topic_df.dropna().groupby(['depth', 'prediction'])\
        ['topic']\
        .count()\
        .unstack('prediction')\
        .fillna(0)\
        .sort(ascending=False)
    stats_df = stats_df.rename(columns={False: 'irrelevant', True: 'relevant'})
    stats_df.plot(kind='barh', stacked=True)
    plt.xlabel('The number of topics')
    stats_df['total'] = stats_df['relevant'] + stats_df['irrelevant']
    stats_df['percent_relevant'] = (stats_df['relevant'] / stats_df['total']).round(3)
    return stats_df.sort(ascending=True)

stats_on_relevant_topics_according_to_classifier(
    default_trained_topic_selection_classifier(),
    precompute_topic_data_frame())









    Out[65]:






  
    
      prediction
      irrelevant
      relevant
      total
      percent_relevant
    
    
      depth
      
      
      
      
    
  
  
    
      1
           0
         38
          38
       1.000
    
    
      2
           1
        421
         422
       0.998
    
    
      3
           0
       1938
        1938
       1.000
    
    
      4
         122
       3630
        3752
       0.967
    
    
      5
        3154
       1131
        4285
       0.264
    
    
      6
        5453
          0
        5453
       0.000
    
    
      7
       12396
          0
       12396
       0.000

Checking the performance of the depth4-based selection, SVM and decision tree



In [66]:

    
def compare_and_plot_the_performances_of_3_classifiers(topic_df):
    
    topics = topic_df.dropna()['topic']
    classes = topic_df.dropna()['is_relevant']    

    def eval_clf(clf, topic_df):
        topics = topic_df.dropna()['topic']
        classes = topic_df.dropna()['is_relevant']
        return [evaluate_classifier(clf, topics, classes, util.f1_pos_class),
                evaluate_classifier(clf, topics, classes, util.f1_neg_class)]

    def overfitting_svm_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_params']['C'] = 1000
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)

    def default_tree_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_fn'] = DecisionTreeClassifier
        training_params['classifier_params'] = {}
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)

    def default_k1n_clf(topic_df):
        training_params = new_training_params()
        training_params['classifier_fn'] = KNeighborsClassifier
        training_params['classifier_params'] = {'n_neighbors' : 1}
        full_selection = precompute_full_selection()
        return train_topic_classifier(topics, classes, full_selection, **training_params)
    
    print("F1 measures, classes 'relevant' and 'irrelevant' for different classifiers:")
    
    print("Depth-4 classifier:", eval_clf(depth_based_selection(max_depth=4), topic_df))
    print("SVM, with C=1000:", eval_clf(overfitting_svm_clf(topic_df), topic_df))
    print("Decision tree:", eval_clf(default_tree_clf(topic_df), topic_df))
    print("1-nearest neighbor:", eval_clf(default_k1n_clf(topic_df), topic_df))
    
compare_and_plot_the_performances_of_3_classifiers(precompute_topic_data_frame())









    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'






    



F1 measures, classes 'relevant' and 'irrelevant' for different classifiers:
Depth-4 classifier:





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Social information processing'<->'Collective intelligence'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Distributed data storage systems'<->'File sharing networks'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'File sharing networks'<->'Distributed data storage'
WARNING:root:Loop 'Artificial intelligence in fiction'<->'Computing in fiction'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Real Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Real Robots'<->'Super Robots'
WARNING:root:Loop 'Super Robots'<->'Real Robots'
WARNING:root:Loop 'Super Robots'<->'Super Robot Wars'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Musician video games'<->'Band-centric video games'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'






    



 [0.88888888888888884, 0.92164674634794164]
SVM, with C=1000:





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Trees (set theory)'<->'Trees (graph theory)'
WARNING:root:Loop 'Cloud storage'<->'File hosting'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Cyberpunk themes'<->'Cyberspace'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Unauthorized video games'<->'Video game clones'






    



 [0.90737240075614378, 0.93509933774834442]
Decision tree:





    



WARNING:root:Loop 'Computer data'<->'Data processing'
WARNING:root:Loop 'Data'<->'Computer data'
WARNING:root:Loop 'Internet search'<->'Real-time web'
WARNING:root:Loop 'Web 2.0'<->'Web services'
WARNING:root:Loop 'Collation'<->'Alphabets'
WARNING:root:Loop 'Data-centric programming languages'<->'Persistent programming languages'
WARNING:root:Loop 'Distributed data storage'<->'Distributed data storage systems'
WARNING:root:Loop 'Distributed data storage'<->'File sharing networks'
WARNING:root:Loop 'Web services'<->'Internet search'
WARNING:root:Loop 'Computing in fiction'<->'Artificial intelligence in fiction'
WARNING:root:Loop 'Internet activism'<->'Internet-based activism'
WARNING:root:Loop 'Music software'<->'Audio software'
WARNING:root:Loop 'Persistent programming languages'<->'Data-centric programming languages'
WARNING:root:Loop 'Video game clones'<->'Unauthorized video games'
WARNING:root:Loop 'Collective intelligence'<->'Social information processing'
WARNING:root:Loop 'Government services web portals in the United States'<->'Website article topics with .gov domain names'
WARNING:root:Loop 'Image processing'<->'Computer vision'
WARNING:root:Loop 'Computer vision'<->'Image processing'
WARNING:root:Loop 'Mobile phones'<->'Videotelephony'
WARNING:root:Loop 'Real-time web'<->'Web 2.0'
WARNING:root:Loop 'Videotelephony'<->'Mobile phones'
WARNING:root:Loop 'Free healthcare software'<->'Free medical software'
WARNING:root:Loop 'Audio software'<->'Music software'
WARNING:root:Loop 'File hosting'<->'Cloud storage'
WARNING:root:Loop 'Free medical software'<->'Free healthcare software'
WARNING:root:Loop 'Website article topics with .gov domain names'<->'Government services web portals in the United States'
WARNING:root:Loop 'Unreal Engine games'<->'Unreal'
WARNING:root:Loop 'Massively multiplayer online role-playing games'<->'MMORPGs by topic'
WARNING:root:Loop 'Unreal'<->'Unreal Engine'






    



 [0.92307692307692302, 0.9498069498069498]
1-nearest neighbor: [0.92366412213740445, 0.94736842105263164]

Plotting the data (with some noise) agains a pair of dimensions



In [69]:

    
def plot_points_2d(topic_df, var1, var2, response):
    from matplotlib.pylab import figure, subplot
    from matplotlib.colors import ListedColormap
    h = .02
    cm_bright = ListedColormap(['#FF0000', '#00FF00'])
    X1 = topic_df.dropna()[var1].values
    X2 = topic_df.dropna()[var2].values
    y = topic_df.dropna()[response]
    
    x1_min, x1_max = X1.min() - .1, X1.max() + .1
    x2_min, x2_max = X2.min() - .1, X2.max() + .1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h),
                           np.arange(x2_min, x2_max, h))
    
    figure(figsize=(10, 8))
    ax = subplot(111)
    # Plot the testing points
    plt.scatter(X1 + np.random.normal(0, 0.02, len(X1)),
                X2 + np.random.normal(0, 0.02, len(X2)),
                c=y.astype(int), cmap=cm_bright,
                marker='+', alpha=1)

    ax.set_xlim(xx1.min(), xx1.max())
    ax.set_ylim(xx2.min(), xx2.max())
    ax.set_xlabel(var1)
    ax.set_ylabel(var2)
    ax.set_xticks(())
    ax.set_yticks(())
    
def plot_topics_against_couple_of_dimensions(topic_df):
    plot_points_2d(topic_df, 'frac_parents_in_graph', 'avg_normalized_parent_depth', 'is_relevant')
    plot_points_2d(topic_df, 'normalized_depth', 'avg_normalized_parent_depth', 'is_relevant')
    plot_points_2d(topic_df, 'normalized_depth', 'frac_parents_in_graph', 'is_relevant')
    plot_points_2d(topic_df, 'median_normalized_parent_depth', 'avg_normalized_parent_depth', 'is_relevant')
    
plot_topics_against_couple_of_dimensions(precompute_topic_data_frame())

Displaying the cases when both relevant and irrelevant topics map to the same point in the feature space



In [70]:

    
def list_topics_that_map_to_same_points(topic_df):
    feature_cols = default_features.feature_names()
    grouped = topic_df.dropna().groupby(feature_cols)
    def group_has_both_relevant_and_irrelevant_topis(group):
        return group['is_relevant'].nunique() > 1
    grouped = grouped.filter(group_has_both_relevant_and_irrelevant_topis)
    grouped = grouped.groupby(feature_cols)
    for group_idx in grouped.groups.values():
        print(topic_df.ix[group_idx]
              [['depth', 'title', 'is_relevant']]
              .sort('is_relevant'))

list_topics_that_map_to_same_points(precompute_topic_data_frame())









    



      depth                                title is_relevant
8250      5              Abstract strategy games       False
7609      5                       Anime industry       False
7610      5                      Comics industry       False
8234      5  Hertzsprung–Russell classifications       False
8237      5              Language classification       False
6914      5                        Mixed reality        True
6944      5                       Graph coloring        True
7081      5                        Linux viruses        True
6833      5          Learning in computer vision        True
      depth                            title is_relevant
2538      4                  Sound recording       False
2713      4  Free software operating systems        True
3503      4                    Linux malware        True
3739      4           Robotics organizations        True
      depth                   title is_relevant
4344      4               Reference       False
3987      4          Media industry       False
4301      4             Cartography       False
4308      4                Notation       False
2517      4  Scientific observation       False
4314      4    Electronic documents        True
3029      4                Spamming        True
3305      4         Computer vision        True
3365      4            Graph theory        True
4326      4  Classification systems        True
       depth                     title is_relevant
14393      6            Encyclopedists       False
10570      6       Physical quantities       False
11094      6                  Lawsuits       False
11146      6       Science experiments       False
11986      6  Dennis the Menace (U.S.)       False
13092      6                   Amulets       False
13182      6       Radio-related lists       False
14261      6  Organizational structure       False
14339      6                  Case law       False
11034      6            Remote sensing        True
      depth                                     title is_relevant
2512      4                            Interrogations       False
4336      4                                Catalogues       False
4740      4                               Whole Earth       False
2879      4                              Type systems        True
3310      4                          Machine learning        True
3664      4  Dedicated application electronic devices        True
      depth                 title is_relevant
7619      5            Publishing       False
8273      5               Sources       False
6810      5  Notepad replacements        True
8864      5      SVGAlib programs        True
     depth                     title is_relevant
428      2      Internet-based works       False
87       2                Data types        True
148      2  Software design patterns        True
212      2              Data centers        True
377      2                 Microsoft        True
405      2           User interfaces        True
      depth                title is_relevant
1359      3  Information economy       False
1141      3   Internet protocols        True
      depth            title is_relevant
4321      4          Museums       False
4345      4  Reference works       False
3311      4  Problem solving        True
      depth              title is_relevant
9851      5  Capcom characters       False
6395      5       SFTP clients        True
      depth                             title is_relevant
4335      4                      Bibliography       False
3247      4  Theorem proving software systems        True
3657      4                       ZX Spectrum        True
2523      4           Visualization (graphic)        True
4794      4                         Typefaces        True
4803      4           Vector graphics editors        True
5182      4                       PLATO games        True
      depth                            title is_relevant
8104      5           Perspective projection       False
6827      5  Applications of computer vision        True
      depth                 title is_relevant
6254      5            Psephology       False
6577      5  Stock market indices       False
6855      5    Dispute resolution       False
8133      5              Heraldry       False
8573      5              Currency       False
7171      5           LibreOffice        True
       depth               title is_relevant
11920      6      Manga industry       False
11250      6  4-chromatic graphs        True
      depth               title is_relevant
6858      5         Imagination       False
7351      5              Gundam       False
8173      5               Radio       False
8291      5  Travel guide books       False
6856      5         Game theory        True
      depth                                     title is_relevant
8245      5           Systems of taxonomy by organism       False
9340      5                              Konami media       False
6941      5  Extensions and generalizations of graphs        True
      depth                  title is_relevant
1628      3        Library science       False
1441      3       Windows software        True
1585      3  Mathematical software        True
      depth            title is_relevant
4280      4    Sign language       False
3349      4  Virtual reality        True
      depth                        title is_relevant
2297      3  Online publishing companies       False
689       3       Statistical data types        True
      depth               title is_relevant
9160      5      No More Heroes       False
8724      5    Wired (magazine)       False
6282      5    Music production       False
7359      5       Radio control       False
7712      5  Total Annihilation       False
8082      5   Geographic images       False
8085      5         Hydrography       False
8145      5   Military insignia       False
8178      5          Archivists       False
8274      5           Standards       False
8281      5        Dictionaries       False
8494      5         Svyazinvest       False
6870      5      Bioinformatics        True
8540      5  Oracle Corporation        True
      depth                      title is_relevant
1614      3        Academic literature       False
1009      3    Artificial intelligence        True
1232      3                   Robotics        True
1233      3                     Robots        True
1620      3  Human–machine interaction        True
      depth                 title is_relevant
8140      5         Lucky symbols       False
8209      5         Museum people       False
8219      5       Types of museum       False
8252      5     Military strategy       False
8771      5  Government typefaces        True
      depth                                             title is_relevant
2840      4                                      Epidemiology       False
4260      4  Nomenclature of Territorial Units for Statistics       False
4312      4                                           Symbols       False
5871      4                                             Linux        True

Examine the topics with level <= 4 that our classifier predicted as irrelevant



In [71]:

    
def report_topics_level_up_to_4_predicted_as_irrelevant(clf, topic_df):
    topic_df = topic_df.copy()
    topic_df['predicted_relevant'] = clf.predict(topic_df['topic'])
    return topic_df[topic_df['depth'] <= 4][topic_df['predicted_relevant'] == False][['title', 'depth']]

report_topics_level_up_to_4_predicted_as_irrelevant(
    default_trained_topic_selection_classifier(),
    precompute_topic_data_frame())









    



/usr/local/lib/python3.4/site-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)






    Out[71]:






  
    
      
      title
      depth
    
  
  
    
      65  
                             Data processing
       2
    
    
      2419
                                  Morse code
       4
    
    
      2420
                                   Alphabets
       4
    
    
      2438
                                 Spintronics
       4
    
    
      2464
                                Compact disc
       4
    
    
      2467
                          Holographic memory
       4
    
    
      2508
                        Time series analysis
       4
    
    
      2509
                                    Censuses
       4
    
    
      2510
                       Design of experiments
       4
    
    
      2514
                         Observational study
       4
    
    
      2516
                  Questionnaire construction
       4
    
    
      2518
                          Survey methodology
       4
    
    
      2527
                                 E-democracy
       4
    
    
      2558
                           Substring indices
       4
    
    
      2568
                                Demodulation
       4
    
    
      2570
                      Software-defined radio
       4
    
    
      2697
                              Novell NetWare
       4
    
    
      2769
                     Soviet computer systems
       4
    
    
      2837
                        Cohort study methods
       4
    
    
      2838
                    Cross-sectional analysis
       4
    
    
      2840
                                Epidemiology
       4
    
    
      2843
                               Index numbers
       4
    
    
      2867
                           Abstract machines
       4
    
    
      2870
                           Combinatory logic
       4
    
    
      2872
                             Lambda calculus
       4
    
    
      2890
                        Substitution (logic)
       4
    
    
      2964
                              Temporal logic
       4
    
    
      2993
                            Cryptocurrencies
       4
    
    
      3049
                         Comcast Corporation
       4
    
    
      3073
             Submarine communications cables
       4
    
    
      ...
      ...
      ...
    
    
      4345
                             Reference works
       4
    
    
      4427
                              VimpelCom Ltd.
       4
    
    
      4437
                              LG Electronics
       4
    
    
      4440
                         Samsung Electronics
       4
    
    
      4453
                     Digital gold currencies
       4
    
    
      4463
                             Payment systems
       4
    
    
      4485
       Visitor attractions in Silicon Valley
       4
    
    
      4524
                              Telecom Italia
       4
    
    
      4714
                     Internet-based activism
       4
    
    
      4751
                               CJK typefaces
       4
    
    
      4793
                              Type foundries
       4
    
    
      4805
                       Anatomical simulation
       4
    
    
      4894
         Synthesizer manufacturing companies
       4
    
    
      5015
                                  Code Lyoko
       4
    
    
      5016
                                     Digimon
       4
    
    
      5059
                                       MSNBC
       4
    
    
      5193
                                       MeeGo
       4
    
    
      5651
                                     Firefox
       4
    
    
      5656
                                    Copyleft
       4
    
    
      5664
                    Open access (publishing)
       4
    
    
      5753
                                    Fangames
       4
    
    
      5764
              Ren'Py games and visual novels
       4
    
    
      5818
                            Speech synthesis
       4
    
    
      5871
                                       Linux
       4
    
    
      5875
              Unification (computer science)
       4
    
    
      5947
                      Stack Exchange network
       4
    
    
      5988
                         Alumni associations
       4
    
    
      6019
                       DARPA Grand Challenge
       4
    
    
      6025
                                Red vs. Blue
       4
    
    
      6139
                           Web documentaries
       4
    
  

123 rows × 2 columns



In [72]:

    
def report_shortest_path(selection, topic):
    return TrainingDataSelection(selection)._shortest_path_from_root(to_category_uri(topic));



In [75]:

    
report_shortest_path(precompute_full_selection(), "Library science")









    Out[75]:





['http://dbpedia.org/resource/Category:Computing',
 'http://dbpedia.org/resource/Category:Information_technology',
 'http://dbpedia.org/resource/Category:Information_science',
 'http://dbpedia.org/resource/Category:Library_science']



In [76]:

    
def get_subcats(topic):
    with WikipediaGraphIndex() as wiki:
        return wiki.get_subcats(topic)
    
def get_supercats(topic):
    with WikipediaGraphIndex() as wiki:
        return wiki.get_supercats(topic)
    
def childrens_parents_with_duplicates(topic):
    return [parent for child in get_subcats(topic)
                   for parent in get_supercats(child)
                   if parent != topic]



In [78]:

    
def default_trained_topic_selection_classifier(precomputed_data={}):
    if 'default_clf' not in precomputed_data:
        full_selection = precompute_full_selection()
        training_data = read_ground_truth_data()
        training_params = new_training_params()
        training_params['classifier_params']['C'] = 1
        training_params['instance_weight_fn'] = lambda x : 1
        clf = train_topic_classifier(
                training_data.keys(), training_data.values(),
                full_selection,
                **training_params)
        precomputed_data['default_clf'] = clf
    return precomputed_data['default_clf']

is_relevant	relevant	irrelevant	total	percent_relevant
depth
1	20	0	20	1.000
2	69	1	70	0.986
3	92	5	97	0.948
4	55	25	80	0.688
5	25	85	110	0.227
6	3	163	166	0.018
7	0	99	99	0.000

prediction	irrelevant	relevant	total	percent_relevant
depth
1	0	38	38	1.000
2	1	421	422	0.998
3	0	1938	1938	1.000
4	122	3630	3752	0.967
5	3154	1131	4285	0.264
6	5453	0	5453	0.000
7	12396	0	12396	0.000

	title	depth
65	Data processing	2
2419	Morse code	4
2420	Alphabets	4
2438	Spintronics	4
2464	Compact disc	4
2467	Holographic memory	4
2508	Time series analysis	4
2509	Censuses	4
2510	Design of experiments	4
2514	Observational study	4
2516	Questionnaire construction	4
2518	Survey methodology	4
2527	E-democracy	4
2558	Substring indices	4
2568	Demodulation	4
2570	Software-defined radio	4
2697	Novell NetWare	4
2769	Soviet computer systems	4
2837	Cohort study methods	4
2838	Cross-sectional analysis	4
2840	Epidemiology	4
2843	Index numbers	4
2867	Abstract machines	4
2870	Combinatory logic	4
2872	Lambda calculus	4
2890	Substitution (logic)	4
2964	Temporal logic	4
2993	Cryptocurrencies	4
3049	Comcast Corporation	4
3073	Submarine communications cables	4
...	...	...
4345	Reference works	4
4427	VimpelCom Ltd.	4
4437	LG Electronics	4
4440	Samsung Electronics	4
4453	Digital gold currencies	4
4463	Payment systems	4
4485	Visitor attractions in Silicon Valley	4
4524	Telecom Italia	4
4714	Internet-based activism	4
4751	CJK typefaces	4
4793	Type foundries	4
4805	Anatomical simulation	4
4894	Synthesizer manufacturing companies	4
5015	Code Lyoko	4
5016	Digimon	4
5059	MSNBC	4
5193	MeeGo	4
5651	Firefox	4
5656	Copyleft	4
5664	Open access (publishing)	4
5753	Fangames	4
5764	Ren'Py games and visual novels	4
5818	Speech synthesis	4
5871	Linux	4
5875	Unification (computer science)	4
5947	Stack Exchange network	4
5988	Alumni associations	4
6019	DARPA Grand Challenge	4
6025	Red vs. Blue	4
6139	Web documentaries	4