This notebook demonstrates how to reproduce the results of our TPAMI paper on NLP tasks.

Caveat: The results may vary from the published version: the published paper reports results obtained from Matlab code, while this is a rewritten Python version. The Python version is the only one we distribute, as it is much cleaner and simpler to run as the Matlab version.

The data splits or folds are specified in the data/datasplit_* files. The random seeds are all 0 as can be seen in the command lines. The experiments were run on our lab's SGE computing cluster, named Fear. The SGE and Python command lines are scripts generated by Python programs (src/fear.py), so that the experimental configuration you can find here is rather self-contained.

Configuration and experiments


In [3]:
pygpstruct_location = '/home/sb358/pygpstruct'
pygpstruct_fear_location = '/home/mlg/sb358/pygpstruct'
result_location = '/bigscratch/sb358/pygpstruct/results'

%load_ext autoreload
%autoreload 2

import sys
sys.path.append(pygpstruct_location + '/src/') # replace by your path to .py files
np.set_printoptions(precision=3)
import fear

In [9]:
for task in ['basenp', 'chunking', 'segmentation', 'japanesene']:
    n_data = {'basenp' : 300, 'chunking' : 100, 'segmentation' : 36, 'japanesene' : 100}[task]
    n_data_train = {'basenp' : 150, 'chunking' : 50, 'segmentation' : 20, 'japanesene' : 50}[task]
    files_prefix = result_location + '/2014-08-22_%s/' % task
    data_indices = np.loadtxt(pygpstruct_location + '/data/datasplit.n_data=%s.txt' % n_data, dtype=np.int16) - 1 # need -1 because doing +1 inside prepare_data_chain
    for fold in range(5):
        fear.launch_qsub_job({ 
            'n_samples' : '250000', 
            'prediction_thinning' : '1000', 
            'lhp_update' : "{'binary' : np.log(1)}",
            'data_indices_train' : 'np.array(%s)' % str(data_indices[fold,:n_data_train].tolist()),
            'data_indices_test' : 'np.array(%s)' % str(data_indices[fold,n_data_train:].tolist()), 
            'data_folder' : "'" + pygpstruct_fear_location + "/data/%s'" % task,
            'task' : "'%s'" % task
            },
            job_hash = 'qsub_' + str(fold), 
            files_prefix=files_prefix, 
            repeat_runs=8)


making path /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp
Your job 2081661 ("qsub_0.sh") has been submitted
Your job 2081662 ("qsub_0.sh") has been submitted
Your job 2081663 ("qsub_0.sh") has been submitted
Your job 2081664 ("qsub_0.sh") has been submitted
Your job 2081665 ("qsub_0.sh") has been submitted
Your job 2081666 ("qsub_0.sh") has been submitted
Your job 2081667 ("qsub_0.sh") has been submitted
Your job 2081668 ("qsub_0.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp
Your job 2081669 ("qsub_1.sh") has been submitted
Your job 2081670 ("qsub_1.sh") has been submitted
Your job 2081671 ("qsub_1.sh") has been submitted
Your job 2081672 ("qsub_1.sh") has been submitted
Your job 2081673 ("qsub_1.sh") has been submitted
Your job 2081674 ("qsub_1.sh") has been submitted
Your job 2081675 ("qsub_1.sh") has been submitted
Your job 2081676 ("qsub_1.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp
Your job 2081677 ("qsub_2.sh") has been submitted
Your job 2081678 ("qsub_2.sh") has been submitted
Your job 2081679 ("qsub_2.sh") has been submitted
Your job 2081680 ("qsub_2.sh") has been submitted
Your job 2081681 ("qsub_2.sh") has been submitted
Your job 2081682 ("qsub_2.sh") has been submitted
Your job 2081683 ("qsub_2.sh") has been submitted
Your job 2081684 ("qsub_2.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp
Your job 2081685 ("qsub_3.sh") has been submitted
Your job 2081686 ("qsub_3.sh") has been submitted
Your job 2081687 ("qsub_3.sh") has been submitted
Your job 2081688 ("qsub_3.sh") has been submitted
Your job 2081689 ("qsub_3.sh") has been submitted
Your job 2081690 ("qsub_3.sh") has been submitted
Your job 2081691 ("qsub_3.sh") has been submitted
Your job 2081692 ("qsub_3.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp
Your job 2081693 ("qsub_4.sh") has been submitted
Your job 2081694 ("qsub_4.sh") has been submitted
Your job 2081695 ("qsub_4.sh") has been submitted
Your job 2081696 ("qsub_4.sh") has been submitted
Your job 2081697 ("qsub_4.sh") has been submitted
Your job 2081698 ("qsub_4.sh") has been submitted
Your job 2081699 ("qsub_4.sh") has been submitted
Your job 2081700 ("qsub_4.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking
Your job 2081701 ("qsub_0.sh") has been submitted
Your job 2081702 ("qsub_0.sh") has been submitted
Your job 2081703 ("qsub_0.sh") has been submitted
Your job 2081704 ("qsub_0.sh") has been submitted
Your job 2081705 ("qsub_0.sh") has been submitted
Your job 2081706 ("qsub_0.sh") has been submitted
Your job 2081707 ("qsub_0.sh") has been submitted
Your job 2081708 ("qsub_0.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking
Your job 2081709 ("qsub_1.sh") has been submitted
Your job 2081710 ("qsub_1.sh") has been submitted
Your job 2081711 ("qsub_1.sh") has been submitted
Your job 2081712 ("qsub_1.sh") has been submitted
Your job 2081713 ("qsub_1.sh") has been submitted
Your job 2081714 ("qsub_1.sh") has been submitted
Your job 2081715 ("qsub_1.sh") has been submitted
Your job 2081716 ("qsub_1.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking
Your job 2081717 ("qsub_2.sh") has been submitted
Your job 2081718 ("qsub_2.sh") has been submitted
Your job 2081719 ("qsub_2.sh") has been submitted
Your job 2081720 ("qsub_2.sh") has been submitted
Your job 2081721 ("qsub_2.sh") has been submitted
Your job 2081722 ("qsub_2.sh") has been submitted
Your job 2081723 ("qsub_2.sh") has been submitted
Your job 2081724 ("qsub_2.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking
Your job 2081725 ("qsub_3.sh") has been submitted
Your job 2081726 ("qsub_3.sh") has been submitted
Your job 2081727 ("qsub_3.sh") has been submitted
Your job 2081728 ("qsub_3.sh") has been submitted
Your job 2081729 ("qsub_3.sh") has been submitted
Your job 2081730 ("qsub_3.sh") has been submitted
Your job 2081731 ("qsub_3.sh") has been submitted
Your job 2081732 ("qsub_3.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking
Your job 2081733 ("qsub_4.sh") has been submitted
Your job 2081734 ("qsub_4.sh") has been submitted
Your job 2081735 ("qsub_4.sh") has been submitted
Your job 2081736 ("qsub_4.sh") has been submitted
Your job 2081737 ("qsub_4.sh") has been submitted
Your job 2081738 ("qsub_4.sh") has been submitted
Your job 2081739 ("qsub_4.sh") has been submitted
Your job 2081740 ("qsub_4.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation
Your job 2081741 ("qsub_0.sh") has been submitted
Your job 2081742 ("qsub_0.sh") has been submitted
Your job 2081743 ("qsub_0.sh") has been submitted
Your job 2081744 ("qsub_0.sh") has been submitted
Your job 2081745 ("qsub_0.sh") has been submitted
Your job 2081746 ("qsub_0.sh") has been submitted
Your job 2081747 ("qsub_0.sh") has been submitted
Your job 2081748 ("qsub_0.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation
Your job 2081749 ("qsub_1.sh") has been submitted
Your job 2081750 ("qsub_1.sh") has been submitted
Your job 2081751 ("qsub_1.sh") has been submitted
Your job 2081752 ("qsub_1.sh") has been submitted
Your job 2081753 ("qsub_1.sh") has been submitted
Your job 2081754 ("qsub_1.sh") has been submitted
Your job 2081755 ("qsub_1.sh") has been submitted
Your job 2081756 ("qsub_1.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation
Your job 2081757 ("qsub_2.sh") has been submitted
Your job 2081758 ("qsub_2.sh") has been submitted
Your job 2081759 ("qsub_2.sh") has been submitted
Your job 2081760 ("qsub_2.sh") has been submitted
Your job 2081761 ("qsub_2.sh") has been submitted
Your job 2081762 ("qsub_2.sh") has been submitted
Your job 2081763 ("qsub_2.sh") has been submitted
Your job 2081764 ("qsub_2.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation
Your job 2081765 ("qsub_3.sh") has been submitted
Your job 2081766 ("qsub_3.sh") has been submitted
Your job 2081767 ("qsub_3.sh") has been submitted
Your job 2081768 ("qsub_3.sh") has been submitted
Your job 2081769 ("qsub_3.sh") has been submitted
Your job 2081770 ("qsub_3.sh") has been submitted
Your job 2081771 ("qsub_3.sh") has been submitted
Your job 2081772 ("qsub_3.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation
Your job 2081773 ("qsub_4.sh") has been submitted
Your job 2081774 ("qsub_4.sh") has been submitted
Your job 2081775 ("qsub_4.sh") has been submitted
Your job 2081776 ("qsub_4.sh") has been submitted
Your job 2081777 ("qsub_4.sh") has been submitted
Your job 2081778 ("qsub_4.sh") has been submitted
Your job 2081779 ("qsub_4.sh") has been submitted
Your job 2081780 ("qsub_4.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene
Your job 2081781 ("qsub_0.sh") has been submitted
Your job 2081782 ("qsub_0.sh") has been submitted
Your job 2081783 ("qsub_0.sh") has been submitted
Your job 2081784 ("qsub_0.sh") has been submitted
Your job 2081785 ("qsub_0.sh") has been submitted
Your job 2081786 ("qsub_0.sh") has been submitted
Your job 2081787 ("qsub_0.sh") has been submitted
Your job 2081788 ("qsub_0.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene
Your job 2081789 ("qsub_1.sh") has been submitted
Your job 2081790 ("qsub_1.sh") has been submitted
Your job 2081791 ("qsub_1.sh") has been submitted
Your job 2081792 ("qsub_1.sh") has been submitted
Your job 2081793 ("qsub_1.sh") has been submitted
Your job 2081794 ("qsub_1.sh") has been submitted
Your job 2081795 ("qsub_1.sh") has been submitted
Your job 2081796 ("qsub_1.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene
Your job 2081797 ("qsub_2.sh") has been submitted
Your job 2081798 ("qsub_2.sh") has been submitted
Your job 2081799 ("qsub_2.sh") has been submitted
Your job 2081800 ("qsub_2.sh") has been submitted
Your job 2081801 ("qsub_2.sh") has been submitted
Your job 2081802 ("qsub_2.sh") has been submitted
Your job 2081803 ("qsub_2.sh") has been submitted
Your job 2081804 ("qsub_2.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene
Your job 2081805 ("qsub_3.sh") has been submitted
Your job 2081806 ("qsub_3.sh") has been submitted
Your job 2081807 ("qsub_3.sh") has been submitted
Your job 2081808 ("qsub_3.sh") has been submitted
Your job 2081809 ("qsub_3.sh") has been submitted
Your job 2081810 ("qsub_3.sh") has been submitted
Your job 2081811 ("qsub_3.sh") has been submitted
Your job 2081812 ("qsub_3.sh") has been submitted
making path /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene
Your job 2081813 ("qsub_4.sh") has been submitted
Your job 2081814 ("qsub_4.sh") has been submitted
Your job 2081815 ("qsub_4.sh") has been submitted
Your job 2081816 ("qsub_4.sh") has been submitted
Your job 2081817 ("qsub_4.sh") has been submitted
Your job 2081818 ("qsub_4.sh") has been submitted
Your job 2081819 ("qsub_4.sh") has been submitted
Your job 2081820 ("qsub_4.sh") has been submitted

In [5]:
#!ssh fear qdel -u sb358
!ssh fear qstat
!date


Fri Oct 17 15:07:29 BST 2014

In [1]:
!tail -n 1 /bigscratch/sb358/pygpstruct/results/2014-08-22_*/qsub_*.log
#!ls -l /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/*


==> /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.log <==
2014-09-19 21:05:28,904Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.log <==
2014-09-19 18:48:43,157Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.log <==
2014-09-30 01:31:47,129Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.log <==
2014-09-26 01:10:04,295Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.log <==
2014-09-30 03:08:54,179Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.log <==
2014-08-29 02:02:58,929Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.log <==
2014-08-29 00:57:21,922Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.log <==
2014-08-28 18:55:41,877Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.log <==
2014-08-29 15:11:32,742Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.log <==
2014-08-29 19:22:28,871Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.log <==
2014-08-28 09:49:54,754Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.log <==
2014-08-28 05:52:47,939Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.log <==
2014-08-27 21:00:54,041Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.log <==
2014-09-24 12:21:33,779Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.log <==
2014-08-28 03:31:30,878Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.log <==
2014-08-28 03:19:30,840Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.log <==
2014-08-27 21:51:58,809Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.log <==
2014-08-27 20:57:19,997Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.log <==
2014-08-27 20:58:30,442Z - INFO - hotstart from iteration 250000, including stored random state

==> /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.log <==
2014-08-27 20:58:23,041Z - INFO - hotstart from iteration 250000, including stored random state

In [35]:
# check state of a job
import pickle
with open("/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.state.pickle", 'rb') as f:
    a=pickle.load(f, encoding='latin1')
print(a)


{'current_ll_test': -916.60294141780082, 'current_f': array([  8.641,  10.794,   5.231, ...,   0.754,  -0.744,   1.318], dtype=float32), 'avg_error': 0.058823529411764705, 'avg_nlm': 0.31003460207612454, 'current_error': 0.064359861591695502, 'current_ll_train': -40.790476250461438, 'mcmc_step': 86448, 'prng': <mtrand.RandomState object at 0x7fae03d48ba8>}

Results


In [8]:
import util 
util.make_figure([3], 
                 [('segmentation', '/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/*.results.bin' ),
                  ('chunking', '/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/*.results.bin' ),
                  ('japanesene', '/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/*.results.bin' ),
                  ('basenp', '/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/*.results.bin' ),
                  ], top=0.15, bottom=0.04)


file_pattern: /bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/*.results.bin
matching files: ['/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_segmentation/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=segmentation.results.bin']
file_pattern: /bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/*.results.bin
matching files: ['/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_chunking/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=chunking.results.bin']
file_pattern: /bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/*.results.bin
matching files: ['/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_japanesene/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=japanesene.results.bin']
file_pattern: /bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/*.results.bin
matching files: ['/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_4.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_3.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_2.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_1.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.results.bin', '/bigscratch/sb358/pygpstruct/results/2014-08-22_basenp/qsub_0.lhp_update=binary:np.log1++n_samples=250000++prediction_thinning=1000++task=basenp.results.bin']

recover Matlab data splits


In [104]:
# Matlab line to regenerate data splits: n_data=150;fold=1; rand('state', fold); r=randperm(n_data*2);save(sprintf('~/n_data=%s.fold=%s.mat', int2str(n_data), int2str(fold)), 'r')
import scipy.io
print(scipy.io.loadmat('/home/sb358/n_data=150.fold=1.mat'))

# convert to txt format
for n_data in [18, 50, 150]:
    a = np.empty((5, n_data*2), dtype=np.int16)
    for fold in range(1,6):
        a[fold-1, :] = scipy.io.loadmat('/home/sb358/n_data=%s.fold=%s.mat' % (str(n_data), str(fold)))['r']
    np.savetxt('/home/sb358/pygpstruct/data/datasplit.n_data=%s.txt' % str(n_data*2), a, fmt="%g")


Out[104]:
{'__globals__': [],
 'r': array([[114, 117, 166, 204, 192, 177,  83,  75,   9, 185, 281,  19, 176,
          80,  37, 270,  22, 245, 252,  84, 207, 179, 293, 211, 265,  39,
          87, 231, 219, 102, 182, 296, 297, 198,  81, 100,  97, 220, 188,
         286, 257, 147, 223, 138, 272, 105, 259, 155,  90, 180, 201, 190,
          13, 121,  94,  46, 214, 285,  59, 210, 136,  89, 260,  47, 224,
         165, 150,  50, 164,  65,  24, 133,  99,  63,  36, 163, 274, 276,
          96, 175, 131, 184,  14,  52, 149,  18,  25, 144,  48,  70, 278,
         232, 151, 172, 247, 111, 206,  34, 107,  54, 170, 237, 108, 137,
          66, 162, 132, 228,  53,  10,  86, 249, 174,  68, 194, 209,  31,
         134, 153, 199, 140, 255, 145,  27, 130, 195, 173, 216,  45, 109,
         116,  12, 156,  41,   6, 181, 158, 193, 242, 250, 113, 295,  44,
          73, 169,  64,  76, 124, 268,  79, 171, 248,  88,  85,   8, 218,
          26, 256, 129,  17,  67, 241, 275,  15,  91, 280, 139, 122, 104,
          43, 290,  30,  33,  29, 159,  93, 230, 264,  35,   4, 282, 291,
         126, 221,  56, 300, 106,  23, 254, 142, 187,  72, 243, 289, 229,
          71, 284, 119, 235, 298, 157, 273, 152, 292, 240, 287, 146,  32,
         226,  55,   2, 123,  20,  74,  40, 125, 115, 279, 269, 203, 208,
         267, 233,  57, 236,  21, 261, 277, 299, 148, 227, 238, 197, 110,
         251, 215, 212,  58, 167, 183, 200, 294,  69,   7,  38, 205,   5,
         161, 222, 239, 189, 101, 288, 225,  49, 253, 217,  95, 112, 135,
          28,  60, 283, 262, 120,  11, 160,  61, 128, 196,  16,  92, 202,
         234, 103, 244,  82, 191,  62, 271, 168,   1, 186,   3, 154, 118,
         246,  51, 143,  98,  78, 141, 266, 178, 127,  77, 258, 213, 263,
          42]], dtype=uint16),
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri Aug 22 22:11:07 2014',
 '__version__': '1.0'}