notebook.community

Edit and run



In [1]:

    
# 1. SETTING UP THE MATLAB<->PYTHON BRIDGE
#
# First, we need to install MATLAB
# http://www.mathworks.com/ or at Harvard http://huit.harvard.edu/services/campus-licensed-software
#
# Then, we need to install the Python libraries
# https://www.mathworks.com/help/matlab/matlab_external/install-the-matlab-engine-for-python.html
#
# Then run this:
# cd matlabdir/extern/engines/python
# python setup.py install
#
# Ready to go!

import matlab.engine
import numpy as np
eng = matlab.engine.start_matlab()

# Python supports MATLAB arrays
# These can be passed into MATLAB functions called by the engine as arguments 
# matlab.single(initializer=None, size=None, is_complex=False)
# a = matlab.double( size=[100,50] )
#

# Call a function like so:
# return = eng.function( parameters )

# MATLAB engine must know the number of output arguments for a function.
# Set this before calling the function with:
# eng.function( parameters, nargout=n ) # e.g., n = 3

# MATLAB engine hiccups if a function does not return an argument
# If so, declare:
# eng.function( nargout=0 )
# before calling the function, otherwise the engine will error.
# Be sure to set it back to 1...n afterwards, though! 
    
    
# Asynchronous callback is possible (amazing.)
#
# Just add another parameter, like so:
# funcHandle = eng.function( parameters, async=True )
#
# To check whether the function is done, call:
# isDone = funcHandle.done()
#
# To collect the result, call:
# result = funcHandle.result()
#
# To cancel
# funcHandle.cancel()
#
# How easy is that?!


# We can also add workspace variables directly
# x = 4.0
# eng.workspace['y'] = x
#
# And evaluate strings with workspace parameters
# a = eng.eval('sqrt(y)')   

# BUT there's a slight issue
# MATLAB Python engine doesn't support numpy arrays directly as a datatype.
# This means we have to copy them to a regular python array first.
# a = matlab.double( cols, rows )
# for i in cols
#     for j in rows
#         a[i][j] = np.array



In [2]:

    
# 1a. TESTING THE MATLAB<->PYTHON BRIDGE
#
# Test code to see if the MATLAB engine is working
tf = eng.isprime(37)
print(tf)









    



True



In [77]:

    
# 1b. DEBUGGING THE MATLAB<->PYTHON BRIDGE
#
# Start the desktop version of MATLAB associated with the engine 'eng' for debug
eng.desktop(nargout=0)



In [ ]:

    
# PROCESS

# 1. SETTING UP THE MATLAB<->PYTHON BRIDGE
# 2. SETUP FOR ACTIVE LABEL SUGGESTION
# Ask user to begin labeling
# Once two labels are achieved:
    # 3. INITIALIZE ACTIVE LABEL SUGGESTION -> returns ordered list of item indices for user to rank.
    # This will 'maximize' learning in system if retrained
# Every new label given by the user:
    # 4. UPDATE ACTIVE LABEL SUGGESTION
    
# NOTES:
    # Replace x with your own data
    # Parameters might need tweaking
    # Indexes in labelFlags start at 1, NOT 0 !!! MATLAB is 1 indexed.



In [3]:

    
# 2. SETUP FOR ACTIVE LABEL SUGGESTION
#
# So, let's get to work.

# I have a set of data items each with a corresponding feature representation
# This is a matrix
# Let's say we have 20 items each with a 100-dimensional feature vector
nItems = 20; #
nFeatures = 100;
x = np.random.rand( nItems, nFeatures );

# Now, we need to represent the distance between each of these items using the graph Laplacian matrix 'LGReg'.
# We're going to build this now using a MATLAB function - 'BuildLGRegularizer.m'

# First, we need to set two parameters, as this is an approximation of the true graph laplacian to allow us to
# use this on very large datasets
manifoldDim = 17;
kNNSize = 20;
# Second, we set the regularization strength of this graph Laplacian
lambdaRP = 0.005;

# Next, we call the function
#LGReg = eng.BuildLGRegularizer( x, manifoldDim, kNNSize, nargout=1 );
# ...but, two problems:
# 1) We need to transform our numpy array x into something MATLAB can handle

xM = matlab.double( size=[nItems, nFeatures] )
for j in range(0, nFeatures-1):
    for i in range(0, nItems-1):
        xM[i][j] = x[i][j];

# 2) LGReg is a 'sparse' matrix type, and python doesn't support that. 
# Let's leave the output variable in the MATLAB workspace, and until we need to use it.
eng.workspace['xM'] = xM;
# We also need to pass our function variables
eng.workspace['nItems'] = nItems;
eng.workspace['nFeatures'] = nFeatures;
eng.workspace['lambdaRP'] = lambdaRP;
eng.workspace['manifoldDim'] = manifoldDim;
eng.workspace['kNNSize'] = kNNSize;

# OK, now let's call our function
eng.eval( "LGReg = BuildLGRegularizer( xM, manifoldDim, kNNSize )", nargout=0 )

# Done.



In [4]:

    
# INITIALIZE ACTIVE LABEL SUGGESTION (AFTER FIRST TWO USER LABELS)
#
# Now, let's initialize the active label suggestion
# We can only do this when we've labeled at least two items
# But, once we have, let's store them in 'labelFlags'
# labelFlags is '1' wherever a user has labeled an item, and '0' everywhere else
labelFlags = matlab.double( size=[nItems, 1] )
labelFlags[0][0] = 1;
labelFlags[1][0] = 1;
eng.workspace['labelFlags'] = labelFlags;

# We also need a convenience variable which gives the ordering of the items in the database
# CAREFUL: 1 indexing, not 0 indexing
eng.eval( "labelIndices = [1:nItems];", nargout=0 )

eng.eval( "ALSApprox = BuildALSApproximation( LGReg, lambdaRP, labelFlags );", nargout=0 )
eng.eval( "SLabels = ActiveLabelSuggestion( ALSApprox, labelIndices, labelFlags );", nargout=0 )

# SLabels now contains the suggested labels in the order that they should be presented to the user
# This uses the labels specified in labelIndices
# Note: Some pythons can't handle int64, so let's just convert it to something more manageable
eng.eval( 'SLabelsInt32 = int32(SLabels);', nargout=0 );
SLabels = eng.workspace['SLabelsInt32'];
print( SLabels )









    



[[9,12,7,19,11,20,4,10,5,8,14,3,15,13,16,17,18,6]]



In [6]:

    
# UPDATE ACTIVE LABEL SUGGESTION (AFTER EVERY NEW INTERACTION BEYOND FIRST TWO)
#
# The user provides a new label for an item!
# CAREFUL: 1 indexing, not 0 indexing
# So, if item 20 was labeled, its index would be 19 in an array in python, but 20 in an array in MATLAB
labeledItemIndex = 20;
eng.workspace['newLabel'] = labeledItemIndex; # Item index
eng.eval( "ALSApprox = UpdateALSApproximation( ALSApprox, newLabel );", nargout=0 )

# Update labelFlags with the index
# CAREFUL: 1 indexing, not 0 indexing
labelFlags[labeledItemIndex-1][0] = 1;
eng.workspace['labelFlags'] = labelFlags;

eng.eval( "labelIndices = [1:nItems];", nargout=0 )
eng.eval( "SLabels = ActiveLabelSuggestion( ALSApprox, labelIndices, labelFlags );", nargout=0 )

# SLabels now contains the suggested labels in the order that they should be presented to the user
# This uses the labels specified in labelIndices
# Note: Some pythons can't handle int64, so let's just convert it to something more manageable
eng.eval( 'SLabelsInt32 = int32(SLabels);', nargout=0 );
SLabels = eng.workspace['SLabelsInt32'];
print( SLabels )









    



[[9,12,7,19,11,4,10,5,8,14,3,15,13,16,17,18,6]]



In [ ]: