From 4e9b5549664af4c09cceb789472a1b98985e1e9d Mon Sep 17 00:00:00 2001 From: pseudotensor Date: Thu, 2 Feb 2017 17:15:40 -0800 Subject: [PATCH 1/5] first stacked running ok --- README.md | 10 +++++++- clstm.py | 70 +++++++++++++++++++++++++++++++++++++++---------------- main.py | 46 ++++++++++++++++++++++++++++-------- 3 files changed, 95 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 55a3550..8cfc511 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ # Inspired by papers: +https://arxiv.org/abs/1506.04214 (Conv LSTM) http://www.jmlr.org/proceedings/papers/v2/sutskever07a/sutskever07a.pdf https://arxiv.org/abs/1411.4389 https://arxiv.org/abs/1504.08023 -https://arxiv.org/abs/1506.04214 (like this paper with RNN but now with LSTM) https://arxiv.org/abs/1511.06380 https://arxiv.org/abs/1511.05440 https://arxiv.org/abs/1605.08104 @@ -110,6 +110,14 @@ smplayer out_all2_fast.mp4 * Try more filters +* Try L2 loss not only on (or not just on) final image, but hidden states. Should approximate adversarial networks, which keep image and hidden latent variable more smoothly connected (i.e. avoid fractured manifold). + +* Try different hyperparameters + +* Try Stacked Conv/Deconv LSTMs (https://arxiv.org/pdf/1506.04214v2.pdf and https://arxiv.org/pdf/1605.07157v4.pdf) + +* Try skip connections (https://arxiv.org/pdf/1605.07157v4.pdf) + * Try temporal convolution * Try other LSTM architectures (C-peek, bind forget-recall, GRU, etc.) diff --git a/clstm.py b/clstm.py index e3e2a41..1a23310 100644 --- a/clstm.py +++ b/clstm.py @@ -42,12 +42,13 @@ class clstm(CRNNCell): # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py - def __init__(self, shape, filter, features, forget_bias=1.0, input_size=None, + def __init__(self, shape, filter, stride, features, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tf.nn.tanh): """Initialize the basic CLSTM cell. Args: shape: int tuple of the height and width of the cell filter: int tuple of the height and width of the filter + stride: stride to use if doing convolution or deconvolution features: int of the depth of the cell forget_bias: float, the bias added to forget gates (see above). input_size: Deprecated. @@ -60,6 +61,7 @@ def __init__(self, shape, filter, features, forget_bias=1.0, input_size=None, logging.warn("%s: Input_size parameter is deprecated.", self) self.shape = shape self.filter = filter + self.stride = stride self.features = features self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple @@ -74,7 +76,7 @@ def state_size(self): def output_size(self): return self._num_units - def __call__(self, inputs, state, scope=None): + def __call__(self, inputs, state, typec='Conv', scope=None): """Long short-term memory cell (LSTM).""" # inputs: batchsize x clstmshape x clstmshape x clstmfeatures with tf.variable_scope(scope or type(self).__name__): @@ -88,12 +90,12 @@ def __call__(self, inputs, state, scope=None): doclstm=1 if doclstm==1: - concat = _convolve_linear([inputs, h], self.filter, self.features * 4, True) + concat = _convolve_linear([inputs, h], self.filter, self.stride, self.features * 4, typec, True) # http://colah.github.io/posts/2015-08-Understanding-LSTMs/ # i = input_gate, j = new_input, f = forget_gate, o = output_gate (each with clstmfeatures features) i, j, f, o = tf.split(3, 4, concat) else: - # work in-progress + # TODO: work in-progress incat = tf.concat(3,args) # general W.x + b separately for each i,j,f,o #i = tf.matmul(incat,weightsi) + biasesi @@ -103,9 +105,10 @@ def __call__(self, inputs, state, scope=None): # concat: batchsize x clstmshape x clstmshape x (clstmfeatures*4) - - new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * - self._activation(j)) + # Hadamard (element-by-element) products (*) + # If stride!=1, then c will be different size than i,j,f,o, so next operation won't work. + new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) + # If stride!=1, then o different dimension than new_h needs to be. (because c and h need to be same size if packing/splitting them as well as recurrently needs to be same size) new_h = self._activation(new_c) * tf.nn.sigmoid(o) if self._state_is_tuple: @@ -114,11 +117,12 @@ def __call__(self, inputs, state, scope=None): new_state = tf.concat(3, [new_c, new_h]) return new_h, new_state -def _convolve_linear(args, filter, features, bias, bias_start=0.0, scope=None): +def _convolve_linear(args, filter, stride, features, typec, bias, bias_start=0.0, scope=None): """convolution: Args: args: 4D Tensor or list of 4D, batch x n, Tensors. filter: int tuple of filter with height and width. + stride: stride for convolution features: int, as number of features. bias_start: starting value to initialize bias; 0 by default. scope: VariableScope for created subgraph; defaults to "Linear". @@ -144,23 +148,49 @@ def _convolve_linear(args, filter, features, bias, bias_start=0.0, scope=None): dtype = [a.dtype for a in args][0] - # Computation - with tf.variable_scope(scope or "Conv"): - # setup weights as kernel x kernel x (input features = clstmfeatures*2) x (new features=clstmfeatures*4) - mat = tf.get_variable( - "Mat", [filter[0], filter[1], total_arg_size_depth, features], dtype=dtype) - if len(args) == 1: - res = tf.nn.conv2d(args[0], mat, strides=[1, 1, 1, 1], padding='SAME') + # concat + if len(args) == 1: + inputs = args[0] + else: + inputs=tf.concat(3, args) + + # Conv + if typec=='Conv': + with tf.variable_scope(scope or "Conv"): + # setup weights as kernel x kernel x (input features = clstmfeatures*2) x (new features=clstmfeatures*4) + weights = tf.get_variable( "Weights", [filter[0], filter[1], total_arg_size_depth, features], dtype=dtype) + res = tf.nn.conv2d(inputs, weights, strides=[1, stride, stride, 1], padding='SAME') + + # BIAS + if bias: + bias_term = tf.get_variable( + "Bias", [features], + dtype=dtype, + initializer=tf.constant_initializer( + bias_start, dtype=dtype)) else: + bias_term = 0*res + + # deConv + if typec=='deConv': + with tf.variable_scope(scope or "deConv"): + # setup weights as kernel x kernel x (new features=clstmfeatures*4) x (input features = clstmfeatures*2). + # i.e., 2nd arg to transpose version is [height, width, output_channels, in_channels], where last 2 are switched compared to normal conv2d + deweights = tf.get_variable( "deWeights", [filter[0], filter[1], features, total_arg_size_depth], dtype=dtype) + output_shape = tf.pack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) # first argument is batchsize x clstmshape x clstmshape x (2*clstmfeatures) - res = tf.nn.conv2d(tf.concat(3, args), mat, strides=[1, 1, 1, 1], padding='SAME') # res: batchsize x clstmshape x clstmshape x (clstmfeatures*4) - if not bias: - return res - bias_term = tf.get_variable( - "Bias", [features], + res = tf.nn.conv2d_transpose(inputs, deweights, output_shape, strides=[1, stride, stride, 1], padding='SAME') + + # BIAS + if bias: + bias_term = tf.get_variable( + "deBias", [features], dtype=dtype, initializer=tf.constant_initializer( bias_start, dtype=dtype)) + else: + bias_term = 0*res + return res + bias_term diff --git a/main.py b/main.py index 82605ae..2bdad6d 100644 --- a/main.py +++ b/main.py @@ -87,6 +87,7 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): clstminput=sizexy/cnnstrideproduct # must be evenly divisible clstmshape=[clstminput,clstminput] clstmkernel=[3,3] + clstmstride=1 # currently needs to be 1 unless implement tf.pad() or tf.nn.fractional_avg_pool() clstmfeatures=cnnfeatures[3] # same as features of last cnn layer fed into clstm # dcnnkernels=[1,3,3,3] # reasonably the reverse order of cnnkernels @@ -112,12 +113,25 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # # x_pred = [] + + #################### + # Setup CLSTM with tf.variable_scope('clstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): # input shape, kernel filter size, number of features - cell = clstm.clstm(clstmshape, clstmkernel, clstmfeatures) + convcell = clstm.clstm(clstmshape, clstmkernel, clstmstride, clstmfeatures) # state: batchsize x clstmshape x clstmshape x clstmfeatures - new_state = cell.set_zero_state(FLAGS.minibatch_size, tf.float32) + new_state = convcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + # Setup deCLSTM + with tf.variable_scope('declstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): + # input shape, kernel filter size, number of features + deconvcell = clstm.clstm(clstmshape, clstmkernel, clstmstride, clstmfeatures) + # state: batchsize x clstmshape x clstmshape x clstmfeatures + denew_state = deconvcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + + + + ######################## # Create CNN-LSTM-dCNN for an input of input_seq_length-1 frames in n time for an output of input_seq_length-1 frames in n+1 time for i in xrange(FLAGS.input_seq_length-1): @@ -137,13 +151,16 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # cnn4: cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") - # lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) + # Convolutional lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) y_0 = cnn4 #y_0 should be same shape as first argument in clstm.clstm() above. - y_1, new_state = cell(y_0, new_state) + y_1, new_state = convcell(y_0, new_state, 'Conv') + + # deConvolutional LSTM layer + y_2, denew_state = deconvcell(y_1, denew_state, 'deConv') # DECODE # cnn5 - cnn5 = ld.dcnn2d_layer(y_1, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") # cnn6 cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") # cnn7 @@ -168,8 +185,14 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Create network to generate predicted video predictframes=50 + ############## + # Setup CLSTM (initialize to zero, but same convcell as in other network) x_pred_long = [] - new_state_pred = cell.set_zero_state(FLAGS.minibatch_size, tf.float32) + new_state_pred = convcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + new_destate_pred = deconvcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + + ####### + # Setup long prediction network for i in xrange(predictframes): # ENCODE @@ -185,13 +208,16 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # cnn4 cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") - # lstm layer + # Convolutional lstm layer y_0 = cnn4 - y_1, new_state_pred = cell(y_0, new_state_pred) + y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv') + + # deConvolutional lstm layer + y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv') # DECODE # cnn5 - cnn5 = ld.dcnn2d_layer(y_1, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") # cnn6 cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") # cnn7 @@ -310,7 +336,7 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): normalnorm=np.sum(dat[0,0]) print("normalnorm=%d" % (normalnorm)) - print("L2 percent loss=%g" % 100.0*(np.sqrt(float(lossm))/float(normalnorm))) + print("L2 percent loss=%g \%" % 100.0*(np.sqrt(float(lossm))/float(normalnorm))) else: # track progress sys.stdout.write('.') From 623a24f8c47de8399acf3eb0a5b0cfec80fdfc0a Mon Sep 17 00:00:00 2001 From: pseudotensor Date: Thu, 2 Feb 2017 17:40:06 -0800 Subject: [PATCH 2/5] minor --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 2bdad6d..5fcd92c 100644 --- a/main.py +++ b/main.py @@ -336,7 +336,7 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): normalnorm=np.sum(dat[0,0]) print("normalnorm=%d" % (normalnorm)) - print("L2 percent loss=%g \%" % 100.0*(np.sqrt(float(lossm))/float(normalnorm))) + print("L2 percent loss=%g" % (100.0*(np.sqrt(float(lossm))/float(normalnorm)))) else: # track progress sys.stdout.write('.') From a85fc902ce24f0c1a712e554fa5d762c2c9cd8d6 Mon Sep 17 00:00:00 2001 From: pseudotensor Date: Fri, 3 Feb 2017 10:41:35 -0800 Subject: [PATCH 3/5] better L2 loss estimate --- README.md | 14 ++++++++++++++ main.py | 8 +++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8cfc511..dc4588f 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,15 @@ smplayer out_all2_fast.mp4 * Training Curve in Tensorflow (norm order 40): ![Alt text](https://github.com/pseudotensor/temporal_autoencoder/blob/master/lossexamples/loss_wheel.jpg "Training loss curve for wheel prediction vs. model.") +Notes for wheel case: + +* Longer training frames work better to predict longer + +* Seems to need to have loss over at least one rotation to be able to predict well into multiple frames in the future + +* Central part of wheel diffuses even when otherwise does well. Lack of resolution + + # Parameters: @@ -102,6 +111,7 @@ smplayer out_all2_fast.mp4 2) In balls.py: * SIZE: size of ball's bounding box in pixels +* omega: angular frequency of rotation for modeltype=1 (wheel type) # Ideas and Future Work: @@ -114,6 +124,10 @@ smplayer out_all2_fast.mp4 * Try different hyperparameters +* Try multi-scale for space + +* Try multi-scale for time (to capture periods over long times) + * Try Stacked Conv/Deconv LSTMs (https://arxiv.org/pdf/1506.04214v2.pdf and https://arxiv.org/pdf/1605.07157v4.pdf) * Try skip connections (https://arxiv.org/pdf/1605.07157v4.pdf) diff --git a/main.py b/main.py index 5fcd92c..4aad671 100644 --- a/main.py +++ b/main.py @@ -27,9 +27,9 @@ """directory to store checkpoints""") tf.app.flags.DEFINE_integer('sizexy', 32, """size x and y dimensions for model, training, and prediction""") -tf.app.flags.DEFINE_integer('input_seq_length', 10, +tf.app.flags.DEFINE_integer('input_seq_length', 50, """size of hidden layer""") -tf.app.flags.DEFINE_integer('predict_frame_start', 5, +tf.app.flags.DEFINE_integer('predict_frame_start', 25, """ frame number, in zero-base counting, to start using prediction as output or next input""") tf.app.flags.DEFINE_integer('max_minibatches', 1000000, """maximum number of mini-batches""") @@ -236,6 +236,7 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Setup loss Computation # Loss computes L2 for original sequence vs. predicted sequence over input_seq_length - (seq.start+1) frames # Compare x^{n+1} to xpred^n (that is supposed to be approximation to x^{n+1}) + # x: batchsize, time steps, sizexy, sizexy, sizez loss = tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:] - x_pred[:,:,:,:,:]) #tf.scalar_summary('loss', loss) tf.summary.scalar('loss', loss) @@ -334,7 +335,8 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): print("step=%d nstep=%d" % (step,nstep)) print("L2 loss=%g" % (lossm)) - normalnorm=np.sum(dat[0,0]) + #normalnorm=np.sum(dat[0,0]) + normalnorm=np.sum(dat[0,FLAGS.predict_frame_start+1:,:,:,:]) print("normalnorm=%d" % (normalnorm)) print("L2 percent loss=%g" % (100.0*(np.sqrt(float(lossm))/float(normalnorm)))) else: From b11006ef9f88a8e806dcf00f3f796c71416c5a43 Mon Sep 17 00:00:00 2001 From: pseudotensor Date: Fri, 3 Feb 2017 14:51:15 -0800 Subject: [PATCH 4/5] Not doing timeconv yet, still cleaning up stacked actually so graph more sensible. --- clstm.py | 8 +- layers.py | 18 ++-- main.py | 313 ++++++++++++++++++++++++++++-------------------------- 3 files changed, 176 insertions(+), 163 deletions(-) diff --git a/clstm.py b/clstm.py index 1a23310..a04e8c0 100644 --- a/clstm.py +++ b/clstm.py @@ -5,7 +5,7 @@ class CRNNCell(object): """CRNN cell. """ - def __call__(self, inputs, state, scope=None): + def __call__(self, inputs, state, typec='Conv', scope=None): """Run this RNN cell on inputs, starting from the inputted state. """ raise NotImplementedError("Abstract method") @@ -90,7 +90,7 @@ def __call__(self, inputs, state, typec='Conv', scope=None): doclstm=1 if doclstm==1: - concat = _convolve_linear([inputs, h], self.filter, self.stride, self.features * 4, typec, True) + concat = _convolve_linear([inputs, h], self.filter, self.stride, self.features * 4, typec, True, scope=scope) # http://colah.github.io/posts/2015-08-Understanding-LSTMs/ # i = input_gate, j = new_input, f = forget_gate, o = output_gate (each with clstmfeatures features) i, j, f, o = tf.split(3, 4, concat) @@ -156,7 +156,7 @@ def _convolve_linear(args, filter, stride, features, typec, bias, bias_start=0.0 # Conv if typec=='Conv': - with tf.variable_scope(scope or "Conv"): + with tf.variable_scope(scope):# or "Conv"): # setup weights as kernel x kernel x (input features = clstmfeatures*2) x (new features=clstmfeatures*4) weights = tf.get_variable( "Weights", [filter[0], filter[1], total_arg_size_depth, features], dtype=dtype) res = tf.nn.conv2d(inputs, weights, strides=[1, stride, stride, 1], padding='SAME') @@ -173,7 +173,7 @@ def _convolve_linear(args, filter, stride, features, typec, bias, bias_start=0.0 # deConv if typec=='deConv': - with tf.variable_scope(scope or "deConv"): + with tf.variable_scope(scope):# or "deConv"): # setup weights as kernel x kernel x (new features=clstmfeatures*4) x (input features = clstmfeatures*2). # i.e., 2nd arg to transpose version is [height, width, output_channels, in_channels], where last 2 are switched compared to normal conv2d deweights = tf.get_variable( "deWeights", [filter[0], filter[1], features, total_arg_size_depth], dtype=dtype) diff --git a/layers.py b/layers.py index 11d65c9..6608007 100644 --- a/layers.py +++ b/layers.py @@ -36,6 +36,9 @@ def _activation_summary(x): tf.histogram_summary(tensor_name + '/activations', x) tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) + # used by cifar10 and inception in tensorflow for multi-GPU systems that have no P2P. + # But Titan X's have DMA P2P, so change to /gpu:0 + #https://github.com/tensorflow/tensorflow/issues/4881 def _variable_on_cpu(name, shape, initializer): """Helper to create a Variable stored on CPU memory. @@ -47,7 +50,8 @@ def _variable_on_cpu(name, shape, initializer): Returns: Variable Tensor """ - with tf.device('/cpu:0'): +# with tf.device('/cpu:0'): + with tf.device('/gpu:0'): var = tf.get_variable(name, shape, initializer=initializer) return var @@ -77,6 +81,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd): return var def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False): + # below scope means this layer is shared for all calls unless idx is different. with tf.variable_scope('{0}_cnn'.format(idx)) as scope: input_channels = inputs.get_shape()[3] # rgb @@ -91,11 +96,12 @@ def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False): return cnn_rect def dcnn2d_layer(inputs, kernel, stride, features, idx, linear = False): - with tf.variable_scope('{0}_trans_cnn'.format(idx)) as scope: + # below scope means this layer is shared for all calls unless idx is different. + with tf.variable_scope('{0}_dcnn'.format(idx)) as scope: input_channels = inputs.get_shape()[3] # rgb - weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay) - biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01)) + weights = _variable_with_weight_decay('deweights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay) + biases = _variable_on_cpu('debiases',[features],tf.constant_initializer(0.01)) batch_size = tf.shape(inputs)[0] output_shape = tf.pack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) dcnn = tf.nn.conv2d_transpose(inputs, weights, output_shape, strides=[1,stride,stride,1], padding='SAME') @@ -116,8 +122,8 @@ def fc_layer(inputs, hiddens, idx, flat = False, linear = False): dim = input_shape[1] inputs_processed = inputs - weights = _variable_with_weight_decay('weights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay) - biases = _variable_on_cpu('biases', [hiddens], tf.constant_initializer(FLAGS.weights_init)) + weights = _variable_with_weight_decay('fcweights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay) + biases = _variable_on_cpu('fcbiases', [hiddens], tf.constant_initializer(FLAGS.weights_init)) if linear: return tf.add(tf.matmul(inputs_processed,weights),biases,name=str(idx)+'_fc') diff --git a/main.py b/main.py index 4aad671..7e91746 100644 --- a/main.py +++ b/main.py @@ -53,8 +53,9 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): sizexy=FLAGS.sizexy # Number of rgb or depth estimation at t=0, but no convolution in this direction sizez=3 - # x: minibatches x input_seq_length of frames x sizex x sizey x sizez(rgb) - x = tf.placeholder(tf.float32, [None, FLAGS.input_seq_length, sizexy, sizexy, sizez]) + with tf.name_scope('input'): + # x: minibatches x input_seq_length of frames x sizex x sizey x sizez(rgb) + x = tf.placeholder(tf.float32, [None, FLAGS.input_seq_length, sizexy, sizexy, sizez]) # Setup dropout hold_prob = tf.placeholder("float") @@ -153,20 +154,20 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Convolutional lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) y_0 = cnn4 #y_0 should be same shape as first argument in clstm.clstm() above. - y_1, new_state = convcell(y_0, new_state, 'Conv') + y_1, new_state = convcell(y_0, new_state, 'Conv', 'clstm') # deConvolutional LSTM layer - y_2, denew_state = deconvcell(y_1, denew_state, 'deConv') + y_2, denew_state = deconvcell(y_1, denew_state, 'deConv', 'declstm') # DECODE # cnn5 - cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_4") # cnn6 - cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") + cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_3") # cnn7 - cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_7") + cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_2") # x_1 (linear act) - x_1 = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_8", True) + x_1 = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_1", True) if i >= FLAGS.predict_frame_start: # add predictive layer x_pred.append(x_1) @@ -210,20 +211,20 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Convolutional lstm layer y_0 = cnn4 - y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv') + y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv', 'clstm') # deConvolutional lstm layer - y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv') + y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv', 'declstm') # DECODE # cnn5 - cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_4") # cnn6 - cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") + cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_3") # cnn7 - cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_7") + cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_2") # x_1_pred (linear act) - x_1_pred = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_8", True) + x_1_pred = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_1", True) if i >= FLAGS.predict_frame_start: x_pred_long.append(x_1_pred) @@ -238,11 +239,15 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Compare x^{n+1} to xpred^n (that is supposed to be approximation to x^{n+1}) # x: batchsize, time steps, sizexy, sizexy, sizez loss = tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:] - x_pred[:,:,:,:,:]) - #tf.scalar_summary('loss', loss) tf.summary.scalar('loss', loss) + normalnorm=tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:]) + tf.summary.scalar('normalnorm', normalnorm) + ploss = tf.sqrt(10.0*loss/normalnorm) + tf.summary.scalar('ploss', ploss) # Set training method - train_operation = tf.train.AdamOptimizer(FLAGS.adamvar).minimize(loss) + with tf.name_scope('train'): + train_operation = tf.train.AdamOptimizer(FLAGS.adamvar).minimize(loss) # List of all Variables variables = tf.global_variables() @@ -257,149 +262,151 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Summary op #summary_op = tf.merge_all_summaries() summary_op = tf.summary.merge_all() - - # Initialize variables - init = tf.global_variables_initializer() - # Start session - sess = tf.Session() - # Initialize Network - if continuetrain==0: - print("Initialize network") - sess.run(init) - else: - print("load network") - # http://stackoverflow.com/questions/33759623/tensorflow-how-to-restore-a-previously-saved-model-python - # - # * means all if need specific format then *.csv - list_of_files = glob.glob(FLAGS.ckpt_dir + '/model.ckpt-*.meta') - if(len(list_of_files)==0): + with tf.Session() as sess: + # Initialize variables + init = tf.global_variables_initializer() + + # Start session + sess = tf.Session() + + # Initialize Network + if continuetrain==0: print("Initialize network") sess.run(init) else: - latest_file = max(list_of_files, key=os.path.getctime) - print("latest_file=%s" % (latest_file)) + print("load network") + # http://stackoverflow.com/questions/33759623/tensorflow-how-to-restore-a-previously-saved-model-python # - checkpoint_path = latest_file - saver = tf.train.import_meta_graph(checkpoint_path) - saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) - all_vars = tf.get_collection('vars') - m = re.search('ckpt-([0-9]+).meta', latest_file) - nstep = int(m.group(1)) - print("done loading network: nstep=%d" % (nstep)) - - # Setup summary - summary_writer = tf.summary.FileWriter(FLAGS.ckpt_dir, sess.graph) - - # Set number of model frames - #modelframes=FLAGS.input_seq_length+predictframes - modelframes=predictframes - - # Set how often dump video to disk - howoftenvid=1000 - # Set how often reports error to summary - howoftensummary=2000 - # Set how often to write checkpoint file - howoftenckpt=2000 - - ############### - # Training Loop - startstep=nstep - for step in xrange(startstep,FLAGS.max_minibatches): - nstep=step - - # Generate mini-batch - dat = md.generate_model_sample(FLAGS.minibatch_size, FLAGS.input_seq_length, FLAGS.sizexy, num_balls, modeltype) - - # Get model data for comparing to prediction if generating video - if nstep%howoftenvid == 0: - datmodel = md.generate_model_sample(1, modelframes, FLAGS.sizexy, num_balls, modeltype) - # Overwrite so consistent with ground truth for video output - dat[0,0:FLAGS.input_seq_length] = datmodel[0,0:FLAGS.input_seq_length] - - # Train on mini-batch - # Compute error in prediction vs. model and compute time of mini-batch task - t = time.time() - _, lossm = sess.run([train_operation, loss],feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) - elapsed = time.time() - t - assert not np.isnan(lossm), 'Model reached lossm = NaN' - - - # Store model and print-out loss - if nstep%howoftensummary == 0 and nstep != 0: - summary_str = sess.run(summary_op, feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) - summary_writer.add_summary(summary_str, nstep) - print("") - print("time per batch is " + str(elapsed) + " seconds") - print("step=%d nstep=%d" % (step,nstep)) - print("L2 loss=%g" % (lossm)) - - #normalnorm=np.sum(dat[0,0]) - normalnorm=np.sum(dat[0,FLAGS.predict_frame_start+1:,:,:,:]) - print("normalnorm=%d" % (normalnorm)) - print("L2 percent loss=%g" % (100.0*(np.sqrt(float(lossm))/float(normalnorm)))) - else: - # track progress - sys.stdout.write('.') - sys.stdout.flush() - - - # Save checkpoint - if nstep%howoftenckpt == 0: - print("Saving checkpoint") - checkpoint_path = os.path.join(FLAGS.ckpt_dir, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=nstep) - print("checkpoint saved to " + FLAGS.ckpt_dir) - - # Output video of model and prediction for single video in mini-batch at this step - if nstep%howoftenvid == 0: - - # Write model video (includes given and ground truth frames) - video_path = os.path.join(FLAGS.video_dir, '') - - #http://stackoverflow.com/questions/10605163/opencv-videowriter-under-osx-producing-no-output - cc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') - fps=4 - sizevx=100 - sizevy=100 - sizevid=(sizevx, sizevy) - - print("") - print("Writing model video") - video = cv2.VideoWriter() - success = video.open(video_path + "model_" + str(nstep) + ".mov", cc, fps, sizevid, True) - image = datmodel[0] - print(image.shape) - for i in xrange(modelframes): - x_1_r = np.uint8(np.minimum(1, np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - video.release() - - # Write given + predicted video - print("Writing predicted video") - video = cv2.VideoWriter() - success = video.open(video_path + "clstm_" + str(nstep) + ".mov", cc, fps, sizevid, True) - - # Preappend starting sequence - image = datmodel[0] - print(image.shape) - for i in xrange(FLAGS.predict_frame_start): - x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - - # Append predicted video - dat_gif = dat - image = sess.run([x_pred_long],feed_dict={x:dat_gif, hold_prob:FLAGS.hold_prob}) - image = image[0][0] - print(image.shape) - for i in xrange(modelframes - FLAGS.predict_frame_start): - x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - video.release() + # * means all if need specific format then *.csv + list_of_files = glob.glob(FLAGS.ckpt_dir + '/model.ckpt-*.meta') + if(len(list_of_files)==0): + print("Initialize network") + sess.run(init) + else: + latest_file = max(list_of_files, key=os.path.getctime) + print("latest_file=%s" % (latest_file)) + # + checkpoint_path = latest_file + saver = tf.train.import_meta_graph(checkpoint_path) + saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) + all_vars = tf.get_collection('vars') + m = re.search('ckpt-([0-9]+).meta', latest_file) + nstep = int(m.group(1)) + print("done loading network: nstep=%d" % (nstep)) + + # Setup summary + summary_writer = tf.summary.FileWriter(FLAGS.ckpt_dir, sess.graph) + + # Set number of model frames + #modelframes=FLAGS.input_seq_length+predictframes + modelframes=predictframes + + # Set how often dump video to disk + howoftenvid=1000 + # Set how often reports error to summary + howoftensummary=100 + # Set how often to write checkpoint file + howoftenckpt=2000 + + ############### + # Training Loop + startstep=nstep + for step in xrange(startstep,FLAGS.max_minibatches): + nstep=step + + # Generate mini-batch + dat = md.generate_model_sample(FLAGS.minibatch_size, FLAGS.input_seq_length, FLAGS.sizexy, num_balls, modeltype) + + # Get model data for comparing to prediction if generating video + if nstep%howoftenvid == 0: + datmodel = md.generate_model_sample(1, modelframes, FLAGS.sizexy, num_balls, modeltype) + # Overwrite so consistent with ground truth for video output + dat[0,0:FLAGS.input_seq_length] = datmodel[0,0:FLAGS.input_seq_length] + + # Train on mini-batch + # Compute error in prediction vs. model and compute time of mini-batch task + t = time.time() + _, lossm = sess.run([train_operation, loss],feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) + elapsed = time.time() - t + assert not np.isnan(lossm), 'Model reached lossm = NaN' + + + # Store model and print-out loss + if nstep%howoftensummary == 0: + summary_str = sess.run(summary_op, feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) + summary_writer.add_summary(summary_str, nstep) + print("") + print("time per batch is " + str(elapsed) + " seconds") + print("step=%d nstep=%d" % (step,nstep)) + print("L2 loss=%g" % (lossm)) + + #normalnorm=np.sum(dat[0,0]) + normalnorm=np.sum(dat[0,FLAGS.predict_frame_start+1:,:,:,:]) + print("normalnorm=%d" % (normalnorm)) + print("L2 percent loss=%g" % (100.0*(np.sqrt(float(lossm))/float(normalnorm)))) + else: + # track progress + sys.stdout.write('.') + sys.stdout.flush() + + + # Save checkpoint + if nstep%howoftenckpt == 0: + print("Saving checkpoint") + checkpoint_path = os.path.join(FLAGS.ckpt_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=nstep) + print("checkpoint saved to " + FLAGS.ckpt_dir) + + # Output video of model and prediction for single video in mini-batch at this step + if nstep%howoftenvid == 0: + + # Write model video (includes given and ground truth frames) + video_path = os.path.join(FLAGS.video_dir, '') + + #http://stackoverflow.com/questions/10605163/opencv-videowriter-under-osx-producing-no-output + cc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') + fps=4 + sizevx=100 + sizevy=100 + sizevid=(sizevx, sizevy) + + print("") + print("Writing model video") + video = cv2.VideoWriter() + success = video.open(video_path + "model_" + str(nstep) + ".mov", cc, fps, sizevid, True) + image = datmodel[0] + print(image.shape) + for i in xrange(modelframes): + x_1_r = np.uint8(np.minimum(1, np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + video.release() + + # Write given + predicted video + print("Writing predicted video") + video = cv2.VideoWriter() + success = video.open(video_path + "clstm_" + str(nstep) + ".mov", cc, fps, sizevid, True) + + # Preappend starting sequence + image = datmodel[0] + print(image.shape) + for i in xrange(FLAGS.predict_frame_start): + x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + + # Append predicted video + dat_gif = dat + image = sess.run([x_pred_long],feed_dict={x:dat_gif, hold_prob:FLAGS.hold_prob}) + image = image[0][0] + print(image.shape) + for i in xrange(modelframes - FLAGS.predict_frame_start): + x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + video.release() def main(argv=None): From bed55b0cabe154bebaf13edfc0a75c8753b5b25a Mon Sep 17 00:00:00 2001 From: pseudotensor Date: Fri, 3 Feb 2017 22:05:09 -0800 Subject: [PATCH 5/5] added peek --- clstm.py | 32 +++++++++++++++++++++++++++++--- main.py | 9 +++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/clstm.py b/clstm.py index a04e8c0..a79e467 100644 --- a/clstm.py +++ b/clstm.py @@ -5,7 +5,7 @@ class CRNNCell(object): """CRNN cell. """ - def __call__(self, inputs, state, typec='Conv', scope=None): + def __call__(self, inputs, state, typec='Conv', dopeek=0, scope=None): """Run this RNN cell on inputs, starting from the inputted state. """ raise NotImplementedError("Abstract method") @@ -76,8 +76,13 @@ def state_size(self): def output_size(self): return self._num_units - def __call__(self, inputs, state, typec='Conv', scope=None): + def __call__(self, inputs, state, typec='Conv', dopeek=0, scope=None): """Long short-term memory cell (LSTM).""" + + # whether to use peek on c + #dopeek=0 + + # inputs: batchsize x clstmshape x clstmshape x clstmfeatures with tf.variable_scope(scope or type(self).__name__): # Parameters of gates are concatenated into one multiply for efficiency. @@ -88,6 +93,7 @@ def __call__(self, inputs, state, typec='Conv', scope=None): c, h = tf.split(3, 2, state) # [inputs,h] is: 2 x batchsize x clstmshape x clstmshape x clstmfeatures + doclstm=1 if doclstm==1: concat = _convolve_linear([inputs, h], self.filter, self.stride, self.features * 4, typec, True, scope=scope) @@ -96,12 +102,25 @@ def __call__(self, inputs, state, typec='Conv', scope=None): i, j, f, o = tf.split(3, 4, concat) else: # TODO: work in-progress - incat = tf.concat(3,args) + incat = tf.concat(3,[inputs, h]) # general W.x + b separately for each i,j,f,o #i = tf.matmul(incat,weightsi) + biasesi #j = tf.matmul(incat,weightsj) + biasesj #f = tf.matmul(incat,weightsf) + biasesf #o = tf.matmul(incat,weightso) + biaseso + + + #https://github.com/tensorflow/tensorflow/issues/834 + # https://arxiv.org/abs/1308.0850 + # https://arxiv.org/pdf/1506.04214v2.pdf + if dopeek==1: + # setup weights same size as c, since element-wise multiplication + weights_ci = tf.get_variable( "Weights_ci", c.get_shape(), dtype=c.dtype) + i = i + c * weights_ci + + weights_cf = tf.get_variable( "Weights_cf", c.get_shape(), dtype=c.dtype) + f = f + c * weights_cf + # concat: batchsize x clstmshape x clstmshape x (clstmfeatures*4) @@ -109,6 +128,13 @@ def __call__(self, inputs, state, typec='Conv', scope=None): # If stride!=1, then c will be different size than i,j,f,o, so next operation won't work. new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) # If stride!=1, then o different dimension than new_h needs to be. (because c and h need to be same size if packing/splitting them as well as recurrently needs to be same size) + + if dopeek==1: + weights_co = tf.get_variable( "Weights_co", c.get_shape(), dtype=c.dtype) + o = o + new_c * weights_co + + + new_h = self._activation(new_c) * tf.nn.sigmoid(o) if self._state_is_tuple: diff --git a/main.py b/main.py index 7e91746..54ff14b 100644 --- a/main.py +++ b/main.py @@ -85,6 +85,7 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): testsize=testsize/cnnstrides[i] # + dopeek=1 # whether to peek as cell state when constructing gates clstminput=sizexy/cnnstrideproduct # must be evenly divisible clstmshape=[clstminput,clstminput] clstmkernel=[3,3] @@ -154,10 +155,10 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Convolutional lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) y_0 = cnn4 #y_0 should be same shape as first argument in clstm.clstm() above. - y_1, new_state = convcell(y_0, new_state, 'Conv', 'clstm') + y_1, new_state = convcell(y_0, new_state, 'Conv', dopeek, 'clstm') # deConvolutional LSTM layer - y_2, denew_state = deconvcell(y_1, denew_state, 'deConv', 'declstm') + y_2, denew_state = deconvcell(y_1, denew_state, 'deConv', dopeek, 'declstm') # DECODE # cnn5 @@ -211,10 +212,10 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Convolutional lstm layer y_0 = cnn4 - y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv', 'clstm') + y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv', dopeek, 'clstm') # deConvolutional lstm layer - y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv', 'declstm') + y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv', dopeek, 'declstm') # DECODE # cnn5