default initialize with Xavier


In [1]:
batchsize = 500

In [2]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
net.build()
net.fit(dataset,batchsize,1)


step 10. loss 5.095845, score:0.469300
step 20. loss 0.874589, score:0.730200
step 30. loss 0.746855, score:0.850000
step 40. loss 0.287814, score:0.894800
step 50. loss 0.409828, score:0.919200
step 60. loss 0.286020, score:0.931500
step 70. loss 0.237745, score:0.944100
step 80. loss 0.225911, score:0.948500
step 90. loss 0.147063, score:0.951200
step 100. loss 0.213493, score:0.955900
step 110. loss 0.118846, score:0.958300
step 120. loss 0.145289, score:0.959900
Out[2]:
<tfs.models.lenet.LeNet at 0x103f2e7d0>

LSUV initializer


In [3]:
from tfs.core.initializer import Initializer,InitType
from tfs.core.layer import *
import numpy as np

def svd_orthonormal(shape):
    if len(shape) < 2:
        raise RuntimeError("Only shapes of length 2 or more are supported.")
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.standard_normal(flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return q

# this initializer would also change the weight of current net.
class LSUV(Initializer):
    ret_type = InitType.values
    available_node_type = [Conv2d, FullyConnect]
    def __init__(
        self,
        net,
        batchX,
        print_names=[]
    ):
        vs = locals()
        net = vs['net']
        del vs['self']
        del vs['net']
        super(LSUV,self).__init__(net,**vs)
        
    def _build_init_table(self):
        tbl = {}
        margin = 0.1
        max_iter = 10
        for n in self.net.net_def:
            print(type(n).__name__)
            if type(n) not in self.available_node_type:
                continue
            my_dict = {}
            
            name = 'weights'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = svd_orthonormal(val.shape)
            my_dict[name] = myval
            
            name = 'biases'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = val
            my_dict[name] = myval
            
            n.set_weights(my_dict)
            
            acts1 = self.net.eval_node(n,self.param.batchX)
            var1=np.var(acts1)
            iter1=0
            needed_variance = 1.0
            print(var1)
            
            while (abs(needed_variance - var1) > margin):
                weights = self.net.run(n.variables['weights'])
                biases = self.net.run(n.variables['biases'])
                weights /= np.sqrt(var1)/np.sqrt(needed_variance)
                w_all_new = {'weights':weights,
                             'biases':biases}
                n.set_weights(w_all_new)
                acts1=self.net.eval_node(n,self.param.batchX)
                var1=np.var(acts1)
                iter1+=1
                print(var1)
                if iter1 > max_iter:
                    break            

        # it is initialized during the loop, so we can return a nil tbl
        return tbl

In [4]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
batchX,batchY = dataset.train.next_batch(batchsize)

In [5]:
net.initializer = LSUV(net,batchX)

In [6]:
net.build() # the number represent the variances that we adjust.


Conv2d
1914.37
0.999999
MaxPool
Conv2d
0.0713312
0.829
0.993449
MaxPool
FullyConnect
1.06592
FullyConnect
0.547299
1.0
Softmax
Out[6]:
<tf.Tensor 'prob:0' shape=(?, 10) dtype=float32>

In [7]:
net.fit(dataset,batchsize,1)


step 10. loss 1.484214, score:0.448000
step 20. loss 0.418603, score:0.858700
step 30. loss 0.444055, score:0.914200
step 40. loss 0.159206, score:0.936500
step 50. loss 0.249291, score:0.948700
step 60. loss 0.145562, score:0.956900
step 70. loss 0.160076, score:0.963000
step 80. loss 0.126397, score:0.965900
step 90. loss 0.095292, score:0.968800
step 100. loss 0.135110, score:0.970500
step 110. loss 0.098984, score:0.972600
step 120. loss 0.128667, score:0.971900
Out[7]:
<tfs.models.lenet.LeNet at 0x1192c1fd0>

In [ ]: