diff --git a/README.md b/README.md index 55a3550..b0be50c 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ # Inspired by papers: +https://arxiv.org/abs/1506.04214 (Conv LSTM) http://www.jmlr.org/proceedings/papers/v2/sutskever07a/sutskever07a.pdf https://arxiv.org/abs/1411.4389 https://arxiv.org/abs/1504.08023 -https://arxiv.org/abs/1506.04214 (like this paper with RNN but now with LSTM) https://arxiv.org/abs/1511.06380 https://arxiv.org/abs/1511.05440 https://arxiv.org/abs/1605.08104 @@ -21,6 +21,17 @@ https://arxiv.org/abs/1502.04681 https://arxiv.org/abs/1605.07157 http://www.ri.cmu.edu/pub_files/2014/3/egpaper_final.pdf +# Papers on temporal convolution + +http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_JiXYY10.pdf +https://arxiv.org/pdf/1506.01911.pdf +http://link.springer.com/chapter/10.1007/978-3-642-25446-8_4#page-1 +https://ai2-s2-pdfs.s3.amazonaws.com/12b6/551a0f9f5aa62f7d37f03ebc66631e529c4b.pdf +http://vision.stanford.edu/pdf/karpathy14.pdf +https://arxiv.org/abs/1412.0767 +https://arxiv.org/abs/1511.06432 +And do 3D convolutions in tensorflow: https://www.tensorflow.org/api_docs/python/nn/convolution + # Uses parts of (or inspired by) the following repos: https://github.com/tensorflow/models/blob/master/real_nvp/real_nvp_utils.py @@ -53,24 +64,28 @@ cs231n.stanford.edu/reports2016/223_Report.pdf # Program Requirements: -* Tensorflow and related packages like python -* OpenCV +* Tensorflow(1.0) +* Python(2.7) +* OpenCV(2) # Post-Processing requirements * avconv, mencoder, MP4Box,smplayer +sudo apt-get install Imagemagick avconv libav-tools mencoder MP4Box + # How to run: -python main.py +python main.py --num_gpus= + +where can be 1 through number of actual GPU boards on node. -Post-processing: making model vs. predicted video: +# Post-processing: making model vs. predicted video: sh mergemov.sh smplayer out_all.mp4 -or smplayer out_all2_fast.mp4 # Some training results: @@ -88,20 +103,26 @@ smplayer out_all2_fast.mp4 * Training Curve in Tensorflow (norm order 40): ![Alt text](https://github.com/pseudotensor/temporal_autoencoder/blob/master/lossexamples/loss_wheel.jpg "Training loss curve for wheel prediction vs. model.") +Notes for wheel case: + +* Longer training frames work better to predict longer + +* Seems to need to have loss over at least one rotation to be able to predict well into multiple frames in the future + +* Central part of wheel diffuses even when otherwise does well. Lack of resolution + + # Parameters: 1) In main.py: * Choose global flags -* In main(): - * Choose to use checkpoints (if exist) or not: continuetrain - * type of model: modeltype - * number of balls: num_balls 2) In balls.py: * SIZE: size of ball's bounding box in pixels +* omega: angular frequency of rotation for modeltype=1 (wheel type) # Ideas and Future Work: @@ -110,7 +131,21 @@ smplayer out_all2_fast.mp4 * Try more filters -* Try temporal convolution +* Try L2 loss not only on (or not just on) final image, but hidden states. Should approximate adversarial networks, which keep image and hidden latent variable more smoothly connected (i.e. avoid fractured manifold). + +* Try different hyperparameters + +* Try multi-scale for space (http://vision.stanford.edu/pdf/karpathy14.pdf) + +* Try multi-scale for time (to capture periods over long times) + +* Try Stacked Conv/Deconv LSTMs (https://arxiv.org/pdf/1506.04214v2.pdf and https://arxiv.org/pdf/1605.07157v4.pdf) + +* Try skip connections (https://arxiv.org/pdf/1605.07157v4.pdf) + +* Try feedbackward connections in time (like implicit stepping vs. explicit forward euler stepping). Pass backwards cell state, back along diagonal in space-time. https://arxiv.org/pdf/1506.01911.pdf + +* Try temporal convolutions. Can factorize with spatial,temporal,spatial, etc. to reduce parameters * Try other LSTM architectures (C-peek, bind forget-recall, GRU, etc.) @@ -123,6 +158,8 @@ http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorf http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/ (pytorch) http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/ https://arxiv.org/pdf/1511.05644v2.pdf +https://arxiv.org/pdf/1612.00005v1.pdf +https://arxiv.org/pdf/1701.06547.pdf * Try more depth in time diff --git a/clstm.py b/clstm.py index e3e2a41..bfffd08 100644 --- a/clstm.py +++ b/clstm.py @@ -5,7 +5,7 @@ class CRNNCell(object): """CRNN cell. """ - def __call__(self, inputs, state, scope=None): + def __call__(self, inputs, state, typec='Conv', dopeek=0, scope=None): """Run this RNN cell on inputs, starting from the inputted state. """ raise NotImplementedError("Abstract method") @@ -42,12 +42,13 @@ class clstm(CRNNCell): # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py - def __init__(self, shape, filter, features, forget_bias=1.0, input_size=None, + def __init__(self, shape, filter, stride, features, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tf.nn.tanh): """Initialize the basic CLSTM cell. Args: shape: int tuple of the height and width of the cell filter: int tuple of the height and width of the filter + stride: stride to use if doing convolution or deconvolution features: int of the depth of the cell forget_bias: float, the bias added to forget gates (see above). input_size: Deprecated. @@ -60,6 +61,7 @@ def __init__(self, shape, filter, features, forget_bias=1.0, input_size=None, logging.warn("%s: Input_size parameter is deprecated.", self) self.shape = shape self.filter = filter + self.stride = stride self.features = features self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple @@ -74,8 +76,13 @@ def state_size(self): def output_size(self): return self._num_units - def __call__(self, inputs, state, scope=None): + def __call__(self, inputs, state, typec='Conv', dopeek=0, scope=None): """Long short-term memory cell (LSTM).""" + + # whether to use peek on c + #dopeek=0 + + # inputs: batchsize x clstmshape x clstmshape x clstmfeatures with tf.variable_scope(scope or type(self).__name__): # Parameters of gates are concatenated into one multiply for efficiency. @@ -83,42 +90,65 @@ def __call__(self, inputs, state, scope=None): c, h = state else: # c and h are each batchsize x clstmshape x clstmshape x clstmfeatures - c, h = tf.split(3, 2, state) + c, h = tf.split(axis=3, num_or_size_splits=2, value=state) # [inputs,h] is: 2 x batchsize x clstmshape x clstmshape x clstmfeatures + doclstm=1 if doclstm==1: - concat = _convolve_linear([inputs, h], self.filter, self.features * 4, True) + concat = _convolve_linear([inputs, h], self.filter, self.stride, self.features * 4, typec, True, scope=scope) # http://colah.github.io/posts/2015-08-Understanding-LSTMs/ # i = input_gate, j = new_input, f = forget_gate, o = output_gate (each with clstmfeatures features) - i, j, f, o = tf.split(3, 4, concat) + i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=concat) else: - # work in-progress - incat = tf.concat(3,args) + # TODO: work in-progress + incat = tf.concat(axis=3,values=[inputs, h]) # general W.x + b separately for each i,j,f,o #i = tf.matmul(incat,weightsi) + biasesi #j = tf.matmul(incat,weightsj) + biasesj #f = tf.matmul(incat,weightsf) + biasesf #o = tf.matmul(incat,weightso) + biaseso + + + #https://github.com/tensorflow/tensorflow/issues/834 + # https://arxiv.org/abs/1308.0850 + # https://arxiv.org/pdf/1506.04214v2.pdf + if dopeek==1: + # setup weights same size as c, since element-wise multiplication + weights_ci = tf.get_variable( "Weights_ci", c.get_shape(), dtype=c.dtype) + i = i + c * weights_ci + + weights_cf = tf.get_variable( "Weights_cf", c.get_shape(), dtype=c.dtype) + f = f + c * weights_cf + # concat: batchsize x clstmshape x clstmshape x (clstmfeatures*4) + # Hadamard (element-by-element) products (*) + # If stride!=1, then c will be different size than i,j,f,o, so next operation won't work. + new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) + # If stride!=1, then o different dimension than new_h needs to be. (because c and h need to be same size if packing/splitting them as well as recurrently needs to be same size) + + if dopeek==1: + weights_co = tf.get_variable( "Weights_co", c.get_shape(), dtype=c.dtype) + o = o + new_c * weights_co + - new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * - self._activation(j)) + new_h = self._activation(new_c) * tf.nn.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: - new_state = tf.concat(3, [new_c, new_h]) + new_state = tf.concat(axis=3, values=[new_c, new_h]) return new_h, new_state -def _convolve_linear(args, filter, features, bias, bias_start=0.0, scope=None): +def _convolve_linear(args, filter, stride, features, typec, bias, bias_start=0.0, scope=None): """convolution: Args: args: 4D Tensor or list of 4D, batch x n, Tensors. filter: int tuple of filter with height and width. + stride: stride for convolution features: int, as number of features. bias_start: starting value to initialize bias; 0 by default. scope: VariableScope for created subgraph; defaults to "Linear". @@ -144,23 +174,49 @@ def _convolve_linear(args, filter, features, bias, bias_start=0.0, scope=None): dtype = [a.dtype for a in args][0] - # Computation - with tf.variable_scope(scope or "Conv"): - # setup weights as kernel x kernel x (input features = clstmfeatures*2) x (new features=clstmfeatures*4) - mat = tf.get_variable( - "Mat", [filter[0], filter[1], total_arg_size_depth, features], dtype=dtype) - if len(args) == 1: - res = tf.nn.conv2d(args[0], mat, strides=[1, 1, 1, 1], padding='SAME') + # concat + if len(args) == 1: + inputs = args[0] + else: + inputs=tf.concat(axis=3, values=args) + + # Conv + if typec=='Conv': + with tf.variable_scope(scope):# or "Conv"): + # setup weights as kernel x kernel x (input features = clstmfeatures*2) x (new features=clstmfeatures*4) + weights = tf.get_variable( "Weights", [filter[0], filter[1], total_arg_size_depth, features], dtype=dtype) + res = tf.nn.conv2d(inputs, weights, strides=[1, stride, stride, 1], padding='SAME') + + # BIAS + if bias: + bias_term = tf.get_variable( + "Bias", [features], + dtype=dtype, + initializer=tf.constant_initializer( + bias_start, dtype=dtype)) else: + bias_term = 0*res + + # deConv + if typec=='deConv': + with tf.variable_scope(scope):# or "deConv"): + # setup weights as kernel x kernel x (new features=clstmfeatures*4) x (input features = clstmfeatures*2). + # i.e., 2nd arg to transpose version is [height, width, output_channels, in_channels], where last 2 are switched compared to normal conv2d + deweights = tf.get_variable( "deWeights", [filter[0], filter[1], features, total_arg_size_depth], dtype=dtype) + output_shape = tf.stack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) # first argument is batchsize x clstmshape x clstmshape x (2*clstmfeatures) - res = tf.nn.conv2d(tf.concat(3, args), mat, strides=[1, 1, 1, 1], padding='SAME') # res: batchsize x clstmshape x clstmshape x (clstmfeatures*4) - if not bias: - return res - bias_term = tf.get_variable( - "Bias", [features], + res = tf.nn.conv2d_transpose(inputs, deweights, output_shape, strides=[1, stride, stride, 1], padding='SAME') + + # BIAS + if bias: + bias_term = tf.get_variable( + "deBias", [features], dtype=dtype, initializer=tf.constant_initializer( bias_start, dtype=dtype)) + else: + bias_term = 0*res + return res + bias_term diff --git a/layers.py b/layers.py index 11d65c9..42755ab 100644 --- a/layers.py +++ b/layers.py @@ -33,9 +33,12 @@ def _activation_summary(x): nothing """ tensor_name = x.op.name - tf.histogram_summary(tensor_name + '/activations', x) - tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) + tf.summary.histogram(tensor_name + '/activations', x) + tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) + # used by cifar10 and inception in tensorflow for multi-GPU systems that have no P2P. + # But Titan X's have DMA P2P, so change to /gpu:0 + #https://github.com/tensorflow/tensorflow/issues/4881 def _variable_on_cpu(name, shape, initializer): """Helper to create a Variable stored on CPU memory. @@ -48,6 +51,7 @@ def _variable_on_cpu(name, shape, initializer): Variable Tensor """ with tf.device('/cpu:0'): +# with tf.device('/gpu:0'): var = tf.get_variable(name, shape, initializer=initializer) return var @@ -71,12 +75,13 @@ def _variable_with_weight_decay(name, shape, stddev, wd): var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev)) if wd: - weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss') + weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') weight_decay.set_shape([]) tf.add_to_collection('losses', weight_decay) return var def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False): + # below scope means this layer is shared for all calls unless idx is different. with tf.variable_scope('{0}_cnn'.format(idx)) as scope: input_channels = inputs.get_shape()[3] # rgb @@ -91,13 +96,15 @@ def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False): return cnn_rect def dcnn2d_layer(inputs, kernel, stride, features, idx, linear = False): - with tf.variable_scope('{0}_trans_cnn'.format(idx)) as scope: + # below scope means this layer is shared for all calls unless idx is different. + with tf.variable_scope('{0}_dcnn'.format(idx)) as scope: input_channels = inputs.get_shape()[3] # rgb - weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay) - biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01)) + weights = _variable_with_weight_decay('deweights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay) + biases = _variable_on_cpu('debiases',[features],tf.constant_initializer(0.01)) + batch_size = tf.shape(inputs)[0] - output_shape = tf.pack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) + output_shape = tf.stack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) dcnn = tf.nn.conv2d_transpose(inputs, weights, output_shape, strides=[1,stride,stride,1], padding='SAME') dcnn_biased = tf.nn.bias_add(dcnn, biases) if linear: @@ -116,8 +123,8 @@ def fc_layer(inputs, hiddens, idx, flat = False, linear = False): dim = input_shape[1] inputs_processed = inputs - weights = _variable_with_weight_decay('weights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay) - biases = _variable_on_cpu('biases', [hiddens], tf.constant_initializer(FLAGS.weights_init)) + weights = _variable_with_weight_decay('fcweights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay) + biases = _variable_on_cpu('fcbiases', [hiddens], tf.constant_initializer(FLAGS.weights_init)) if linear: return tf.add(tf.matmul(inputs_processed,weights),biases,name=str(idx)+'_fc') diff --git a/main.py b/main.py index 82605ae..b20b939 100644 --- a/main.py +++ b/main.py @@ -27,10 +27,14 @@ """directory to store checkpoints""") tf.app.flags.DEFINE_integer('sizexy', 32, """size x and y dimensions for model, training, and prediction""") -tf.app.flags.DEFINE_integer('input_seq_length', 10, +tf.app.flags.DEFINE_integer('sizez', 3, + """size z for rgb or any other such information""") +tf.app.flags.DEFINE_integer('input_seq_length', 50, """size of hidden layer""") -tf.app.flags.DEFINE_integer('predict_frame_start', 5, +tf.app.flags.DEFINE_integer('predict_frame_start', 25, """ frame number, in zero-base counting, to start using prediction as output or next input""") +tf.app.flags.DEFINE_integer('predictframes', 50, + """number of frames to predict""") tf.app.flags.DEFINE_integer('max_minibatches', 1000000, """maximum number of mini-batches""") tf.app.flags.DEFINE_float('hold_prob', .8, @@ -39,187 +43,379 @@ """adamvar for dropout""") tf.app.flags.DEFINE_integer('minibatch_size', 16, """mini-batch size""") +tf.app.flags.DEFINE_integer('init_num_balls', 1, + """How many balls to model.""") +# Choose which model to work on +# 0 = classic bouncing balls +# 1 = rotating "ball" +tf.app.flags.DEFINE_integer('modeltype', 1, + """Type of model.""") +tf.app.flags.DEFINE_integer('num_gpus', 1, + """How many GPUs to use.""") +tf.app.flags.DEFINE_boolean('log_device_placement', False, + """Whether to log device placement.""") + +tf.app.flags.DEFINE_integer('continuetrain', 1, + """Whether to continue to train (1, default) or not (0).""") + + +def total_parameters(): + total_parameters = 0 + for variable in tf.trainable_variables(): + # shape is an array of tf.Dimension + shape = variable.get_shape() + #print(shape) + #print(len(shape)) + variable_parametes = 1 + for dim in shape: + #print(dim) + variable_parametes *= dim.value + #print(variable_parametes) + total_parameters += variable_parametes + print("total_parameters=%d" % (total_parameters)) + + +def tower_loss(x,x_dropout,scope): + """Calculate the total loss on a single tower running the model. + + Args: + scope: unique prefix string identifying the tower, e.g. 'tower0' + + Returns: + Tensor of shape [] containing the total loss for a batch of data + """ + + ####################################################### + # Create network to train + # + # Setup inputs + # size of balls in x-y directions each (same) + sizexy=FLAGS.sizexy + # Number of rgb or depth estimation at t=0, but no convolution in this direction + sizez=FLAGS.sizez + + + cnnkernels=[3,3,3,1] + cnnstrides=[2,1,2,1] + cnnstrideproduct=np.product(cnnstrides) + cnnfeatures=[8,8,8,4] + # + # check strides are acceptable + testsize=sizexy + for i in xrange(len(cnnstrides)): + if testsize % cnnstrides[i] !=0: + print("sizexy must be evenly divisible by each stride, in order to keep input to cnn or dcnn an integer number of pixels") + exit + else: + testsize=testsize/cnnstrides[i] + # + + dopeek=1 # whether to peek as cell state when constructing gates + clstminput=sizexy/cnnstrideproduct # must be evenly divisible + clstmshape=[clstminput,clstminput] + clstmkernel=[3,3] + clstmstride=1 # currently needs to be 1 unless implement tf.pad() or tf.nn.fractional_avg_pool() + clstmfeatures=cnnfeatures[3] # same as features of last cnn layer fed into clstm + # + dcnnkernels=[1,3,3,3] # reasonably the reverse order of cnnkernels + dcnnstrides=[1,2,1,2] # reasonably the reverse order of cnnstrides + dcnnstrideproduct=np.product(dcnnstrides) + # last dcnn feature is rgb again + dcnnfeatures=[8,8,8,sizez] # reasonably the reverse order of cnnfeatures, except last cnnfeatures and last dcnnfeatures (note, features are for produced object, while kernels and strides operate on current object, hence apparent shift) + # + # check d-strides are acceptable + testsize=sizexy + for i in xrange(len(dcnnstrides)): + if testsize % dcnnstrides[i] !=0: + print("sizexy must be evenly divisible by each d-stride, in order to keep input to cnn or dcnn an integer number of pixels") + exit + else: + testsize=testsize/dcnnstrides[i] + # + # ensure strides cumulate to same total product so input and output same size, because we feed output back as input + if dcnnstrideproduct!=cnnstrideproduct: + print("cnn and dcnn strides must match for creating input size and output same size"); + exit + # + # + # + + + + + #################### + # Setup CLSTM + with tf.variable_scope('clstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): + # input shape, kernel filter size, number of features + convcell = clstm.clstm(clstmshape, clstmkernel, clstmstride, clstmfeatures) + # state: batchsize x clstmshape x clstmshape x clstmfeatures + new_state = convcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + # Setup deCLSTM + with tf.variable_scope('declstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): + # input shape, kernel filter size, number of features + deconvcell = clstm.clstm(clstmshape, clstmkernel, clstmstride, clstmfeatures) + # state: batchsize x clstmshape x clstmshape x clstmfeatures + denew_state = deconvcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + + + ######################## + # Create CNN-LSTM-dCNN for an input of input_seq_length-1 frames in n time for an output of input_seq_length-1 frames in n+1 time + x_pred = [] + for i in xrange(FLAGS.input_seq_length-1): + + # ENCODE + # CNN: (name, 2D square kernel filter size, stride for spatial domain, number of feature maps, name) using ELUs + # cnn1: + if i < FLAGS.predict_frame_start: + # only dropout on training layers + cnn1 = ld.cnn2d_layer(x_dropout[:,i,:,:,:], cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") + else: + # direct input of prior output for predictive layers + cnn1 = ld.cnn2d_layer(x_1, cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") + # cnn2: + cnn2 = ld.cnn2d_layer(cnn1, cnnkernels[1], cnnstrides[1], cnnfeatures[1], "cnn_2") + # cnn3: + cnn3 = ld.cnn2d_layer(cnn2, cnnkernels[2], cnnstrides[2], cnnfeatures[2], "cnn_3") + # cnn4: + cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") + + # Convolutional lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) + y_0 = cnn4 #y_0 should be same shape as first argument in clstm.clstm() above. + y_1, new_state = convcell(y_0, new_state, 'Conv', dopeek, 'clstm') + + # deConvolutional LSTM layer + y_2, denew_state = deconvcell(y_1, denew_state, 'deConv', dopeek, 'declstm') + + # DECODE + # cnn5 + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_4") + # cnn6 + cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_3") + # cnn7 + cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_2") + # x_1 (linear act) + x_1 = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_1", True) + if i >= FLAGS.predict_frame_start: + # add predictive layer + x_pred.append(x_1) + # set reuse to true after first go + if i == 0: + tf.get_variable_scope().reuse_variables() + + # Pack-up predictive layer's results + # e.g. for input_seq_length=10 loop 0..9, had put into x_pred i=5,6,7,8,9 (i.e. 5 frame prediction) + x_pred = tf.stack(x_pred) + # reshape so in order of minibatch x frame x sizex x sizey x rgb + x_pred = tf.transpose(x_pred, [1,0,2,3,4]) + + + ####################################################### + # Create network to generate predicted video (TODO: could keep on only 1 gpu or on cpu) + predictframes=FLAGS.predictframes + + ############## + # Setup CLSTM (initialize to zero, but same convcell as in other network) + x_pred_long = [] + new_state_pred = convcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + new_destate_pred = deconvcell.set_zero_state(FLAGS.minibatch_size, tf.float32) + + ####### + # Setup long prediction network + for i in xrange(predictframes): + + # ENCODE + # cnn1 + if i < FLAGS.predict_frame_start: # use known sequence for this many frames + cnn1 = ld.cnn2d_layer(x[:,i,:,:,:], cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") + else: # use generated sequence for rest of frames + cnn1 = ld.cnn2d_layer(x_1_pred, cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") + # cnn2 + cnn2 = ld.cnn2d_layer(cnn1, cnnkernels[1], cnnstrides[1], cnnfeatures[1], "cnn_2") + # cnn3 + cnn3 = ld.cnn2d_layer(cnn2, cnnkernels[2], cnnstrides[2], cnnfeatures[2], "cnn_3") + # cnn4 + cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") + + # Convolutional lstm layer + y_0 = cnn4 + y_1, new_state_pred = convcell(y_0, new_state_pred, 'Conv', dopeek, 'clstm') + + # deConvolutional lstm layer + y_2, new_destate_pred = deconvcell(y_1, new_destate_pred, 'deConv', dopeek, 'declstm') + + # DECODE + # cnn5 + cnn5 = ld.dcnn2d_layer(y_2, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_4") + # cnn6 + cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_3") + # cnn7 + cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_2") + # x_1_pred (linear act) + x_1_pred = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_1", True) + if i >= FLAGS.predict_frame_start: + x_pred_long.append(x_1_pred) + + # Pack-up predicted layer's results + x_pred_long = tf.stack(x_pred_long) + x_pred_long = tf.transpose(x_pred_long, [1,0,2,3,4]) + + + ####################################################### + # Setup loss Computation + # Loss computes L2 for original sequence vs. predicted sequence over input_seq_length - (seq.start+1) frames + # Compare x^{n+1} to xpred^n (that is supposed to be approximation to x^{n+1}) + # x: batchsize, time steps, sizexy, sizexy, sizez + loss = tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:] - x_pred[:,:,:,:,:]) + tf.summary.scalar('loss', loss) + normalnorm=tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:]) + tf.summary.scalar('normalnorm', normalnorm) + ploss = tf.sqrt(10.0*loss/normalnorm) + tf.summary.scalar('ploss', ploss) + + return loss,normalnorm,ploss,x_pred,x_pred_long + + + +def average_gradients(tower_grads): + """Calculate the average gradient for each shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over individual gradients. The inner list is over the gradient + calculation for each tower. + Returns: + List of pairs of (gradient, variable) where the gradient has been averaged + across all towers. + """ + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(grads, 0) + grad = tf.reduce_mean(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads # Function to train autoencoder network -def autoencode(continuetrain=0,modeltype=0,num_balls=2): +def autoencode(continuetrain=0,modeltype=0,init_num_balls=2): - with tf.Graph().as_default(): - - # Setup inputs - # size of balls in x-y directions each (same) - sizexy=FLAGS.sizexy - # Number of rgb or depth estimation at t=0, but no convolution in this direction - sizez=3 - # x: minibatches x input_seq_length of frames x sizex x sizey x sizez(rgb) - x = tf.placeholder(tf.float32, [None, FLAGS.input_seq_length, sizexy, sizexy, sizez]) - - # Setup dropout - hold_prob = tf.placeholder("float") - x_dropout = tf.nn.dropout(x, hold_prob) - - # Some checks - if FLAGS.input_seq_length-1<=FLAGS.predict_frame_start: - print("prediction frame starting point (zero starting point) beyond input size - 1, so no prediction used as next input or even used as any output to compute loss") - exit - - ####################################################### - # Create network to train - # - cnnkernels=[3,3,3,1] - cnnstrides=[2,1,2,1] - cnnstrideproduct=np.product(cnnstrides) - cnnfeatures=[8,8,8,4] - # - # check strides are acceptable - testsize=sizexy - for i in xrange(len(cnnstrides)): - if testsize % cnnstrides[i] !=0: - print("sizexy must be evenly divisible by each stride, in order to keep input to cnn or dcnn an integer number of pixels") - exit - else: - testsize=testsize/cnnstrides[i] - # - - clstminput=sizexy/cnnstrideproduct # must be evenly divisible - clstmshape=[clstminput,clstminput] - clstmkernel=[3,3] - clstmfeatures=cnnfeatures[3] # same as features of last cnn layer fed into clstm - # - dcnnkernels=[1,3,3,3] # reasonably the reverse order of cnnkernels - dcnnstrides=[1,2,1,2] # reasonably the reverse order of cnnstrides - dcnnstrideproduct=np.product(dcnnstrides) - # last dcnn feature is rgb again - dcnnfeatures=[8,8,8,sizez] # reasonably the reverse order of cnnfeatures, except last cnnfeatures and last dcnnfeatures (note, features are for produced object, while kernels and strides operate on current object, hence apparent shift) - # - # check d-strides are acceptable - testsize=sizexy - for i in xrange(len(dcnnstrides)): - if testsize % dcnnstrides[i] !=0: - print("sizexy must be evenly divisible by each d-stride, in order to keep input to cnn or dcnn an integer number of pixels") - exit - else: - testsize=testsize/dcnnstrides[i] - # - # ensure strides cumulate to same total product so input and output same size, because we feed output back as input - if dcnnstrideproduct!=cnnstrideproduct: - print("cnn and dcnn strides must match for creating input size and output same size"); - exit - # - # - # - x_pred = [] - with tf.variable_scope('clstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): - # input shape, kernel filter size, number of features - cell = clstm.clstm(clstmshape, clstmkernel, clstmfeatures) - # state: batchsize x clstmshape x clstmshape x clstmfeatures - new_state = cell.set_zero_state(FLAGS.minibatch_size, tf.float32) - - # Create CNN-LSTM-dCNN for an input of input_seq_length-1 frames in n time for an output of input_seq_length-1 frames in n+1 time - for i in xrange(FLAGS.input_seq_length-1): - - # ENCODE - # CNN: (name, 2D square kernel filter size, stride for spatial domain, number of feature maps, name) using ELUs - # cnn1: - if i < FLAGS.predict_frame_start: - # only dropout on training layers - cnn1 = ld.cnn2d_layer(x_dropout[:,i,:,:,:], cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") - else: - # direct input of prior output for predictive layers - cnn1 = ld.cnn2d_layer(x_1, cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") - # cnn2: - cnn2 = ld.cnn2d_layer(cnn1, cnnkernels[1], cnnstrides[1], cnnfeatures[1], "cnn_2") - # cnn3: - cnn3 = ld.cnn2d_layer(cnn2, cnnkernels[2], cnnstrides[2], cnnfeatures[2], "cnn_3") - # cnn4: - cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") - - # lstm layer (input y_0 and hidden state, output prediction y_1 and new hidden state new_state) - y_0 = cnn4 #y_0 should be same shape as first argument in clstm.clstm() above. - y_1, new_state = cell(y_0, new_state) - - # DECODE - # cnn5 - cnn5 = ld.dcnn2d_layer(y_1, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") - # cnn6 - cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") - # cnn7 - cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_7") - # x_1 (linear act) - x_1 = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_8", True) - if i >= FLAGS.predict_frame_start: - # add predictive layer - x_pred.append(x_1) - # set reuse to true after first go - if i == 0: - tf.get_variable_scope().reuse_variables() - - # Pack-up predictive layer's results - # e.g. for input_seq_length=10 loop 0..9, had put into x_pred i=5,6,7,8,9 (i.e. 5 frame prediction) - x_pred = tf.pack(x_pred) - # reshape so in order of minibatch x frame x sizex x sizey x rgb - x_pred = tf.transpose(x_pred, [1,0,2,3,4]) + # Some checks + if FLAGS.input_seq_length-1<=FLAGS.predict_frame_start: + print("prediction frame starting point (zero starting point) beyond input size - 1, so no prediction used as next input or even used as any output to compute loss") + exit + + + # Setup graph and train + with tf.Graph().as_default(), tf.device('/cpu:0'): + + # Create a variable to count the number of train() calls. This equals the + # number of batches processed * FLAGS.num_gpus. + global_step = tf.get_variable( + 'global_step', [], + initializer=tf.constant_initializer(0), trainable=False) + + + # Set training method for all towers + opt = tf.train.AdamOptimizer(FLAGS.adamvar) - ####################################################### - # Create network to generate predicted video - predictframes=50 - - x_pred_long = [] - new_state_pred = cell.set_zero_state(FLAGS.minibatch_size, tf.float32) - for i in xrange(predictframes): - - # ENCODE - # cnn1 - if i < FLAGS.predict_frame_start: # use known sequence for this many frames - cnn1 = ld.cnn2d_layer(x[:,i,:,:,:], cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") - else: # use generated sequence for rest of frames - cnn1 = ld.cnn2d_layer(x_1_pred, cnnkernels[0], cnnstrides[0], cnnfeatures[0], "cnn_1") - # cnn2 - cnn2 = ld.cnn2d_layer(cnn1, cnnkernels[1], cnnstrides[1], cnnfeatures[1], "cnn_2") - # cnn3 - cnn3 = ld.cnn2d_layer(cnn2, cnnkernels[2], cnnstrides[2], cnnfeatures[2], "cnn_3") - # cnn4 - cnn4 = ld.cnn2d_layer(cnn3, cnnkernels[3], cnnstrides[3], cnnfeatures[3], "cnn_4") - - # lstm layer - y_0 = cnn4 - y_1, new_state_pred = cell(y_0, new_state_pred) - - # DECODE - # cnn5 - cnn5 = ld.dcnn2d_layer(y_1, dcnnkernels[0], dcnnstrides[0], dcnnfeatures[0], "dcnn_5") - # cnn6 - cnn6 = ld.dcnn2d_layer(cnn5, dcnnkernels[1], dcnnstrides[1], dcnnfeatures[1], "dcnn_6") - # cnn7 - cnn7 = ld.dcnn2d_layer(cnn6, dcnnkernels[2], dcnnstrides[2], dcnnfeatures[2], "dcnn_7") - # x_1_pred (linear act) - x_1_pred = ld.dcnn2d_layer(cnn7, dcnnkernels[3], dcnnstrides[3], dcnnfeatures[3], "dcnn_8", True) - if i >= FLAGS.predict_frame_start: - x_pred_long.append(x_1_pred) - - # Pack-up predicted layer's results - x_pred_long = tf.pack(x_pred_long) - x_pred_long = tf.transpose(x_pred_long, [1,0,2,3,4]) - - - ####################################################### - # Setup loss Computation - # Loss computes L2 for original sequence vs. predicted sequence over input_seq_length - (seq.start+1) frames - # Compare x^{n+1} to xpred^n (that is supposed to be approximation to x^{n+1}) - loss = tf.nn.l2_loss(x[:,FLAGS.predict_frame_start+1:,:,:,:] - x_pred[:,:,:,:,:]) - #tf.scalar_summary('loss', loss) - tf.summary.scalar('loss', loss) - - # Set training method - train_operation = tf.train.AdamOptimizer(FLAGS.adamvar).minimize(loss) + # Setup independent Graph model for each gpu + tower_grads = [] + tower_vars = [] + tower_x_pred = [] + tower_x_pred_long = [] + with tf.variable_scope(tf.get_variable_scope()): # variable scope + + + # setup graph input x and x_dropout + # x: gpus x minibatch size x input_seq_length of frames x sizex x sizey x sizez(rgb) + x = tf.placeholder(tf.float32, [FLAGS.num_gpus, FLAGS.minibatch_size, FLAGS.input_seq_length, FLAGS.sizexy, FLAGS.sizexy, FLAGS.sizez]) + # Setup dropout + hold_prob = tf.placeholder("float") + x_dropout = tf.nn.dropout(x, FLAGS.hold_prob) + + # Go over gpus + for i in xrange(FLAGS.num_gpus): + with tf.device('/gpu:%d' % i): + with tf.name_scope('%s%d' % ("tower", i)) as scope: # only op scope + + # Calculate the loss for one tower. This function + # constructs the entire model but shares the variables across + # all towers. + with tf.variable_scope('graph'): + towerloss,normalnorm,ploss,towerxpred,towerxpredlong = tower_loss(x[i],x_dropout[i],scope) + tower_vars.append(towerloss) + + + # Collect vars for all towers. + tower_x_pred.append(towerxpred) + tower_x_pred_long.append(towerxpredlong) + + # Reuse variables for the next tower (share variables across towers -- one one each gpu) + tf.get_variable_scope().reuse_variables() + + # Retain the summaries from the final tower. + summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) + + # Calculate the gradients for the batch of data on this CIFAR tower. + grads = opt.compute_gradients(towerloss) + + # Keep track of the gradients across all towers. + tower_grads.append(grads) + + # We must calculate the mean of each gradient. Note that this is the + # synchronization point across all towers. + grads = average_gradients(tower_grads) + + # Add histograms for gradients. + for grad, var in grads: + if grad is not None: + summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) + + # Apply the gradients to adjust the shared variables. + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + + # Add histograms for trainable variables. + print("trainable vars") + for var in tf.trainable_variables(): + print(var) + summaries.append(tf.summary.histogram(var.op.name, var)) + + # Track the moving averages of all trainable variables. + MOVING_AVERAGE_DECAY=0.9999 + variable_averages = tf.train.ExponentialMovingAverage( + MOVING_AVERAGE_DECAY, global_step) + # synchronous variable averaging + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + + # Group all updates to into a single train op. + train_op = tf.group(apply_gradient_op, variables_averages_op) + # List of all Variables variables = tf.global_variables() - # Create saver for checkpoints and summary saver = tf.train.Saver(variables) @@ -230,162 +426,205 @@ def autoencode(continuetrain=0,modeltype=0,num_balls=2): # Summary op #summary_op = tf.merge_all_summaries() summary_op = tf.summary.merge_all() - - # Initialize variables - init = tf.global_variables_initializer() - # Start session - sess = tf.Session() - # Initialize Network - if continuetrain==0: - print("Initialize network") - sess.run(init) - else: - print("load network") - # http://stackoverflow.com/questions/33759623/tensorflow-how-to-restore-a-previously-saved-model-python - # - # * means all if need specific format then *.csv - list_of_files = glob.glob(FLAGS.ckpt_dir + '/model.ckpt-*.meta') - if(len(list_of_files)==0): + with tf.Session() as sess: + # Initialize variables + init = tf.global_variables_initializer() + + # Start session + sess = tf.Session(config=tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=FLAGS.log_device_placement)) + + + # Start the queue runners. + tf.train.start_queue_runners(sess=sess) + + + # Initialize Network + if continuetrain==0: print("Initialize network") sess.run(init) else: - latest_file = max(list_of_files, key=os.path.getctime) - print("latest_file=%s" % (latest_file)) + print("load network") + # http://stackoverflow.com/questions/33759623/tensorflow-how-to-restore-a-previously-saved-model-python # - checkpoint_path = latest_file - saver = tf.train.import_meta_graph(checkpoint_path) - saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) - all_vars = tf.get_collection('vars') - m = re.search('ckpt-([0-9]+).meta', latest_file) - nstep = int(m.group(1)) - print("done loading network: nstep=%d" % (nstep)) - - # Setup summary - summary_writer = tf.summary.FileWriter(FLAGS.ckpt_dir, sess.graph) - - # Set number of model frames - #modelframes=FLAGS.input_seq_length+predictframes - modelframes=predictframes - - # Set how often dump video to disk - howoftenvid=1000 - # Set how often reports error to summary - howoftensummary=2000 - # Set how often to write checkpoint file - howoftenckpt=2000 - - ############### - # Training Loop - startstep=nstep - for step in xrange(startstep,FLAGS.max_minibatches): - nstep=step - - # Generate mini-batch - dat = md.generate_model_sample(FLAGS.minibatch_size, FLAGS.input_seq_length, FLAGS.sizexy, num_balls, modeltype) - - # Get model data for comparing to prediction if generating video - if nstep%howoftenvid == 0: - datmodel = md.generate_model_sample(1, modelframes, FLAGS.sizexy, num_balls, modeltype) - # Overwrite so consistent with ground truth for video output - dat[0,0:FLAGS.input_seq_length] = datmodel[0,0:FLAGS.input_seq_length] - - # Train on mini-batch - # Compute error in prediction vs. model and compute time of mini-batch task - t = time.time() - _, lossm = sess.run([train_operation, loss],feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) - elapsed = time.time() - t - assert not np.isnan(lossm), 'Model reached lossm = NaN' - - - # Store model and print-out loss - if nstep%howoftensummary == 0 and nstep != 0: - summary_str = sess.run(summary_op, feed_dict={x:dat, hold_prob:FLAGS.hold_prob}) - summary_writer.add_summary(summary_str, nstep) - print("") - print("time per batch is " + str(elapsed) + " seconds") - print("step=%d nstep=%d" % (step,nstep)) - print("L2 loss=%g" % (lossm)) - - normalnorm=np.sum(dat[0,0]) - print("normalnorm=%d" % (normalnorm)) - print("L2 percent loss=%g" % 100.0*(np.sqrt(float(lossm))/float(normalnorm))) - else: - # track progress - sys.stdout.write('.') - sys.stdout.flush() + # * means all if need specific format then *.csv + list_of_files = glob.glob(FLAGS.ckpt_dir + '/model.ckpt-*.meta') + if(len(list_of_files)==0): + print("Initialize network") + sess.run(init) + else: + latest_file = max(list_of_files, key=os.path.getctime) + print("latest_file=%s" % (latest_file)) + # + checkpoint_path = latest_file + saver = tf.train.import_meta_graph(checkpoint_path) + saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) + all_vars = tf.get_collection('vars') + m = re.search('ckpt-([0-9]+).meta', latest_file) + nstep = int(m.group(1)) + print("done loading network: nstep=%d" % (nstep)) + + # Setup summary + summary_writer = tf.summary.FileWriter(FLAGS.ckpt_dir, sess.graph) + + # Set number of model frames + #modelframes=FLAGS.input_seq_length+predictframes + modelframes=FLAGS.predictframes + + # Set how often dump video to disk + howoftenvid=1000 + # Set how often reports error to summary + howoftensummary=100 + # Set how often to write checkpoint file + howoftenckpt=2000 + + # count and output total number of model/graph parameters + total_parameters() + + ############### + # Training Loop + startstep=nstep + num_balls = FLAGS.init_num_balls + for step in xrange(startstep,FLAGS.max_minibatches): + nstep=step + + ######################### + # model-dependent code + if step%howoftenvid==0 and step>0: + num_balls=num_balls+1 + # limit so doesn't go beyond point where can't fit balls and reaches good_config=False always in models.py + if num_balls>3: + num_balls=3 + print("num_balls=%d" % (num_balls)) + + + # create input data + tower_dat = [] + tower_datmodel = [] + with tf.variable_scope(tf.get_variable_scope()): # variable scope + for i in xrange(FLAGS.num_gpus): + # Generate mini-batch + dat = md.generate_model_sample(FLAGS.minibatch_size, FLAGS.input_seq_length, FLAGS.sizexy, num_balls, FLAGS.modeltype) + + # Get model data for comparing to prediction if generating video + datmodel = md.generate_model_sample(1, modelframes, FLAGS.sizexy, num_balls, FLAGS.modeltype) + # Overwrite so consistent with ground truth for video output + dat[0,0:FLAGS.input_seq_length] = datmodel[0,0:FLAGS.input_seq_length] + + # Collect dat for all towers. + tower_dat.append(dat) + tower_datmodel.append(datmodel) + # pack-up input data + tower_dat = np.asarray(tower_dat) + tower_datmodel = np.asarray(tower_datmodel) - # Save checkpoint - if nstep%howoftenckpt == 0: - print("Saving checkpoint") - checkpoint_path = os.path.join(FLAGS.ckpt_dir, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=nstep) - print("checkpoint saved to " + FLAGS.ckpt_dir) - - # Output video of model and prediction for single video in mini-batch at this step - if nstep%howoftenvid == 0: - - # Write model video (includes given and ground truth frames) - video_path = os.path.join(FLAGS.video_dir, '') - - #http://stackoverflow.com/questions/10605163/opencv-videowriter-under-osx-producing-no-output - cc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') - fps=4 - sizevx=100 - sizevy=100 - sizevid=(sizevx, sizevy) - - print("") - print("Writing model video") - video = cv2.VideoWriter() - success = video.open(video_path + "model_" + str(nstep) + ".mov", cc, fps, sizevid, True) - image = datmodel[0] - print(image.shape) - for i in xrange(modelframes): - x_1_r = np.uint8(np.minimum(1, np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - video.release() - - # Write given + predicted video - print("Writing predicted video") - video = cv2.VideoWriter() - success = video.open(video_path + "clstm_" + str(nstep) + ".mov", cc, fps, sizevid, True) - - # Preappend starting sequence - image = datmodel[0] - print(image.shape) - for i in xrange(FLAGS.predict_frame_start): - x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - - # Append predicted video - dat_gif = dat - image = sess.run([x_pred_long],feed_dict={x:dat_gif, hold_prob:FLAGS.hold_prob}) - image = image[0][0] - print(image.shape) - for i in xrange(modelframes - FLAGS.predict_frame_start): - x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) - new_im = cv2.resize(x_1_r, (sizevx,sizevy)) - video.write(new_im) - video.release() + + # Train on mini-batch + # Compute error in prediction vs. model and compute time of mini-batch task + t = time.time() + + #_, lossm = sess.run(train_op,feed_dict={x:tower_dat}) + #print("sess.run on step=%d" % (step));sys.stdout.flush() + #print("shape of tower_dat") + #print(np.shape(tower_dat));sys.stdout.flush() + #print("shape of x") + #print(x.get_shape());sys.stdout.flush() + + _, lossm = sess.run([train_op,towerloss],feed_dict={x:tower_dat}) + elapsed = time.time() - t + assert not np.isnan(lossm), 'Model reached lossm = NaN' + + + # Store model + if nstep%howoftensummary == 0 and nstep!=0: + summary_str = sess.run(summary_op, feed_dict={x:tower_dat}) + summary_writer.add_summary(summary_str, nstep) + + # Print-out loss + if nstep%howoftensummary == 0: + summary_str = sess.run(summary_op, feed_dict={x:tower_dat}) + summary_writer.add_summary(summary_str, nstep) + print("") + print("time per batch is " + str(elapsed) + " seconds") + print("step=%d nstep=%d" % (step,nstep)) + print("L2 loss=%g" % (lossm)) + + localnormalnorm=np.sum(tower_dat[0][0,FLAGS.predict_frame_start+1:,:,:,:]) # pull from 0th tower + print("localnormalnorm=%d" % (localnormalnorm)) + print("L2 percent loss=%g" % (100.0*(np.sqrt(float(lossm))/float(localnormalnorm)))) + else: + # track progress + sys.stdout.write('.') + sys.stdout.flush() + + + # Save checkpoint + if nstep%howoftenckpt == 0: + print("Saving checkpoint") + checkpoint_path = os.path.join(FLAGS.ckpt_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=nstep) + print("checkpoint saved to " + FLAGS.ckpt_dir) + + # Output video of model and prediction for single video in mini-batch at this step + if nstep%howoftenvid == 0: + + # Write model video (includes given and ground truth frames) + video_path = os.path.join(FLAGS.video_dir, '') + + #http://stackoverflow.com/questions/10605163/opencv-videowriter-under-osx-producing-no-output + cc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') + fps=4 + sizevx=100 + sizevy=100 + sizevid=(sizevx, sizevy) + + print("") + print("Writing model video") + video = cv2.VideoWriter() + success = video.open(video_path + "model_" + str(nstep) + ".mov", cc, fps, sizevid, True) + image = tower_datmodel[0][0] # pull from 0th tower + print(image.shape) + for i in xrange(modelframes): + x_1_r = np.uint8(np.minimum(1, np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + video.release() + + # Write given + predicted video + print("Writing predicted video") + video = cv2.VideoWriter() + success = video.open(video_path + "clstm_" + str(nstep) + ".mov", cc, fps, sizevid, True) + + # Preappend starting sequence + image = tower_datmodel[0][0] # pull from 0th tower + print(image.shape) + for i in xrange(FLAGS.predict_frame_start): + x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + + # Append predicted video + image = sess.run([tower_x_pred_long],feed_dict={x:tower_dat}) + image = image[0][0][0] # pull from 0th tower + print(image.shape) + for i in xrange(modelframes - FLAGS.predict_frame_start): + x_1_r = np.uint8(np.minimum(1,np.maximum(image[i,:,:,:], 0)) * 255) + new_im = cv2.resize(x_1_r, (sizevx,sizevy)) + video.write(new_im) + video.release() def main(argv=None): # - # Choose to continue training (1) or not (0) - continuetrain=1 + continuetrain=FLAGS.continuetrain # # - # Choose which model to work on - # 0 = classic bouncing balls - # 1 = rotating "ball" - modeltype=1 - # Number of balls - num_balls=1 + modeltype=FLAGS.modeltype + init_num_balls=FLAGS.init_num_balls # # Setup checkpoint directory if tf.gfile.Exists(FLAGS.ckpt_dir): @@ -403,9 +642,8 @@ def main(argv=None): tf.gfile.MakeDirs(FLAGS.video_dir) # Start training autoencoder - autoencode(continuetrain=continuetrain,modeltype=modeltype,num_balls=num_balls) + autoencode(continuetrain=continuetrain,modeltype=modeltype,init_num_balls=init_num_balls) if __name__ == '__main__': tf.app.run() - diff --git a/models.py b/models.py index cd570b9..9fbc37b 100644 --- a/models.py +++ b/models.py @@ -41,7 +41,13 @@ def model_n(T=64, TY=0, n=2, r=None, m=None): good_config=False + goodconfigattempt=0 + maxgoodconfigattempts=10 while not good_config: + goodconfigattempt=goodconfigattempt+1 + if goodconfigattempt>maxgoodconfigattempts: + break + x = 2+rand(n,2)*8 good_config=True for i in range(n):