import os import sys import time import numpy import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from pprint import pprint from theano.tensor.signal import downsample from theano.tensor.nnet import conv from theano.tensor.nnet import conv3d2d from scipy.io import matlab import re import math from theano import shared from collections import OrderedDict from store_pytable import * from layers import * from theano.misc.pkl_utils import dump def dump_weights_pickle(classifier,file_name): W0 = classifier.params[0] W1 = classifier.params[2] W2 = classifier.params[4] W3 = classifier.params[6] W4 = classifier.params[8] W5 = classifier.params[10] b0 = classifier.params[1] b1 = classifier.params[3] b2 = classifier.params[5] b3 = classifier.params[7] b4 = classifier.params[9] b5 = classifier.params[11] with open(file_name, 'wb') as f: dump((W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5), f) def shared_dataset(data_x, data_y, borrow=True): shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32') def numpy_floatX(data): return numpy.asarray(data, dtype=theano.config.floatX) def load_ATP_data(data_ratio): dataName = "data"; labelName = "label"; labelShape = []; pos_or_neg = 'pos' ID = 'train'+'_'+'pos' filename_train_pos = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_pos, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_train_pos=dataColumn_train[:] y_train_pos=labelColumn_train[:] pos_or_neg = 'neg' ID = 'train'+'_'+'neg' filename_train_neg = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_neg, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_train_neg=dataColumn_train[:] y_train_neg=labelColumn_train[:] pos_or_neg = 'pos' ID = 'test'+'_'+'pos' filename_train_pos = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_pos, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_test_pos=dataColumn_train[:] y_test_pos=labelColumn_train[:] pos_or_neg = 'neg' ID = 'test'+'_'+'neg' filename_train_neg = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_neg, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_test_neg=dataColumn_train[:] y_test_neg=labelColumn_train[:] num_of_pos_test = X_test_pos.shape[0] num_of_neg_test = X_test_neg.shape[0] Xt=numpy.concatenate((X_test_pos, X_test_neg), axis=0) yt=numpy.concatenate((y_test_pos, y_test_neg), axis=0) s = [X_train_pos.shape[0],X_train_neg.shape[0]] data_size = min(s) min_index = s.index(data_size) print "pos:"+str(s[0]) print "neg:"+str(s[1]) X_pos_all = [] y_pos_all = [] X_neg_all = [] y_neg_all = [] if(min_index==0): ratio = float(s[1])/s[0] r = int(numpy.floor(ratio)) r = min(r,data_ratio) print "more neg, ratio="+str(ratio) for i in range (0,r): X_pos_all.append(X_train_pos) y_pos_all.append(y_train_pos) X_neg_all.append(X_train_neg[i*data_size:(i+1)*data_size]) y_neg_all.append(y_train_neg[i*data_size:(i+1)*data_size]) elif(min_index==1): ratio = float(s[0])/s[1] r = int(numpy.floor(ratio)) r = min(r,data_ratio) print "more pos ,ratio="+str(ratio) for i in range (0,r): X_pos_all.append(X_train_pos[i*data_size:(i+1)*data_size]) y_pos_all.append(y_train_pos[i*data_size:(i+1)*data_size]) X_neg_all.append(X_train_neg) y_neg_all.append(y_train_neg) equal_examples=[] equal_labels=[] for i in range (0,len(X_pos_all)): equal_examples.append(numpy.concatenate((X_pos_all[i], X_neg_all[i]), axis=0)) equal_labels.append(numpy.concatenate((y_pos_all[i], y_neg_all[i]), axis=0)) equal_examples = numpy.array(equal_examples) equal_labels=numpy.array(equal_labels) equal_examples = numpy.reshape(equal_examples,(equal_examples.shape[0]*equal_examples.shape[1], 4, 20, 20, 20)) equal_labels = equal_labels.flatten() num_of_train = int(19*float(equal_examples.shape[0])/20) num_of_val = int(1*float(equal_examples.shape[0])/20) print num_of_train print num_of_val mask_train = random.sample(xrange(equal_examples.shape[0]), num_of_train) X = equal_examples[mask_train] y = equal_labels[mask_train] equal_examples = numpy.delete(equal_examples, mask_train, 0) equal_labels = numpy.delete(equal_labels, mask_train, 0) mask_val=random.sample(xrange(equal_examples.shape[0]), num_of_val) Xv =equal_examples[mask_val] yv = equal_labels[mask_val] equal_examples = numpy.delete(equal_examples, mask_val, 0) equal_labels = numpy.delete(equal_labels, mask_val, 0) print "equal_examples size in the end:" print equal_examples.shape print "equal_labels size in the end:" print equal_labels.shape print "train X size: " print X.shape print "val X size: " print Xv.shape print "train y size: " print y.shape print "val y size: " print yv.shape all_train_x=[] all_train_y=[] all_train_sizes=[] train_size = X.shape[0] part = train_size/20 for i in range (0,20): mask = range(part*i,part*(i+1)) Xtr=X[mask] ytr=y[mask] all_train_x.append(Xtr) all_train_y.append(ytr) all_train_sizes.append(Xtr.shape[0]) all_examples=[all_train_x,Xt,Xv] all_labels=[all_train_y,yt,yv] #datasets = [(all_train_x, all_train_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return [all_examples, all_labels, 4, all_train_sizes, Xt.shape[0], Xv.shape[0]] def _dropout_from_layer(rng, layer, p): """p is the probablity of dropping a unit """ srng = theano.tensor.shared_randomstreams.RandomStreams( rng.randint(999999)) # p=1-p because 1's indicate keep and p is prob of dropping mask = srng.binomial(n=1, p=1-p, size=layer.shape) # The cast is important because # int * float32 = float64 which pulls things off the gpu output = layer * T.cast(mask, theano.config.floatX) return output class ConvDropNet(object): """A multilayer perceptron with all the trappings required to do dropout training. """ def __init__(self, rng, input, in_channels, dropout_rates, batch_size, use_bias=True): #rectified_linear_activation = lambda x: T.maximum(0.0, x) # Set up all the hidden layers #weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:]) self.layers = [] self.dropout_layers = [] ####### filter_w=3 num_3d_pixel=20 layer0_w = num_3d_pixel layer0_h = num_3d_pixel layer0_d = num_3d_pixel layer1_w = (layer0_w-3+1) #14 layer1_h = (layer0_h-3+1) layer1_d = (layer0_d-3+1) layer2_w = (layer1_w-3+1)/2 #14 layer2_h = (layer1_h-3+1)/2 layer2_d = (layer1_d-3+1)/2 layer3_w = (layer2_w-3+1)/2 layer3_h = (layer2_h-3+1)/2 layer3_d = (layer2_d-3+1)/2 ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # image sizes batchsize = batch_size in_time = num_3d_pixel in_width = num_3d_pixel in_height = num_3d_pixel #filter sizes flt_channels = 100 flt_time = filter_w flt_width = filter_w flt_height = filter_w signals_shape0 = (batchsize, in_time, in_channels, in_height, in_width) filters_shape0 = (flt_channels, 3, in_channels, 3, 3) signals_shape1 = (batchsize, layer1_d, flt_channels, layer1_h, layer1_w) filters_shape1 = (flt_channels*2, 3, flt_channels, 3, 3) signals_shape2 = (batchsize, layer2_d, flt_channels*2, layer2_h, layer2_w) filters_shape2 = (flt_channels*4, 3, flt_channels*2, 3, 3) #next_layer_input = input #first_layer = True # dropout the input layer0_input = input.reshape(signals_shape0) #20 layer0_dropout_input = _dropout_from_layer(rng, layer0_input, p=dropout_rates[0]) self.tparams = OrderedDict() # W0: flt_channels, flt_time, in_channels, flt_height, flt_width ################################### Dropout_layer0 = Dropout_Conv_3d_Layer(rng=rng, input=layer0_dropout_input, image_shape=signals_shape0, filter_shape=filters_shape0, dropout_rate=dropout_rates[1]) self.dropout_layers.append(Dropout_layer0) next_dropout_input = Dropout_layer0.output self.tparams['W_0'] = Dropout_layer0.W self.tparams['b_0'] = Dropout_layer0.b layer0 = Conv_3d_Layer(rng=rng, input=layer0_input, #18 image_shape=signals_shape0, filter_shape=filters_shape0, W=Dropout_layer0.W * (1 - dropout_rates[1]), b=Dropout_layer0.b * (1 - dropout_rates[1]), ) self.layers.append(layer0) next_layer_input = layer0.output ################################## ################################### Dropout_layer1 = Dropout_Conv_3d_Layer(rng=rng, input=next_dropout_input, image_shape=signals_shape1, filter_shape=filters_shape1, dropout_rate=dropout_rates[2]) self.tparams['W_1'] = Dropout_layer1.W self.tparams['b_1'] = Dropout_layer1.b Dropout_layer1_pool = PoolLayer3D(input=Dropout_layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.dropout_layers.append(Dropout_layer1) # self.dropout_layers.append(Dropout_layer1_pool) next_dropout_input = Dropout_layer1_pool.output.dimshuffle(0,2,1,3,4) layer1 = Conv_3d_Layer(rng=rng, input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 image_shape=signals_shape1, filter_shape=filters_shape1, W=Dropout_layer1.W * (1 - dropout_rates[2]), b=Dropout_layer1.b * (1 - dropout_rates[2]), ) layer1_pool = PoolLayer3D(input=layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.layers.append(layer1) # self.layers.append(layer1_pool) next_layer_input = layer1_pool.output.dimshuffle(0,2,1,3,4) ################################## ################################### Dropout_layer2 = Dropout_Conv_3d_Layer(rng=rng, input=next_dropout_input, image_shape=signals_shape2, filter_shape=filters_shape2, dropout_rate=dropout_rates[3]) Dropout_layer2_pool = PoolLayer3D(input=Dropout_layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.tparams['W_2'] = Dropout_layer2.W self.tparams['b_2'] = Dropout_layer2.b self.dropout_layers.append(Dropout_layer2) # self.dropout_layers.append(Dropout_layer2_pool) next_dropout_input = Dropout_layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2) layer2 = Conv_3d_Layer(rng=rng, input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 image_shape=signals_shape2, filter_shape=filters_shape2, W=Dropout_layer2.W * (1 - dropout_rates[3]), b=Dropout_layer2.b * (1 - dropout_rates[3]), ) layer2_pool = PoolLayer3D(input=layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.layers.append(layer2) # self.layers.append(layer2_pool) next_layer_input = layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2) ################################## # W4: 200*layer4_w*layer4_h, 500 Dropout_layer3 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), n_out=1000, dropout_rate=dropout_rates[4]) self.dropout_layers.append(Dropout_layer3) next_dropout_input = Dropout_layer3.output self.tparams['W_3'] = Dropout_layer3.W self.tparams['b_3'] = Dropout_layer3.b # Reuse the paramters from the dropout layer here, in a different # path through the graph. layer3 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer3.W * (1 - dropout_rates[4]), b=Dropout_layer3.b * (1 - dropout_rates[4]), n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), n_out=1000, ) self.layers.append(layer3) next_layer_input = layer3.output ################################## #layer4_input = layer2.output.flatten(2) # N*200 # W4: 200*layer4_w*layer4_h, 500 Dropout_layer4 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=1000, n_out=100, dropout_rate=dropout_rates[5]) self.dropout_layers.append(Dropout_layer4) next_dropout_input = Dropout_layer4.output self.tparams['W_4'] = Dropout_layer4.W self.tparams['b_4'] = Dropout_layer4.b # Reuse the paramters from the dropout layer here, in a different # path through the graph. layer4 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer4.W * (1 - dropout_rates[5]), b=Dropout_layer4.b * (1 - dropout_rates[5]), n_in=1000, n_out=100, ) self.layers.append(layer4) next_layer_input = layer4.output ##################### TODO ####################### Dropout_layer5 = LogisticRegression( input=next_dropout_input, n_in=100, n_out=20) self.dropout_layers.append(Dropout_layer5) self.tparams['W_5'] = Dropout_layer5.W self.tparams['b_5'] = Dropout_layer5.b # Again, reuse paramters in the dropout output. layer5 = LogisticRegression( input=next_layer_input, W=Dropout_layer5.W, b=Dropout_layer5.b, n_in=100, n_out=20) self.layers.append(layer5) # Use the negative log likelihood of the logistic regression layer as # the objective. self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood self.dropout_errors = self.dropout_layers[-1].errors self.negative_log_likelihood = self.layers[-1].negative_log_likelihood self.errors = self.layers[-1].errors self.L2_sqr = T.sum(Dropout_layer0.W ** 2)+T.sum(Dropout_layer1.W**2)+T.sum(Dropout_layer2.W**2)+T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) # Grab all the parameters together. self.params = [ param for layer in self.dropout_layers for param in layer.params ] def test_fine_S_CNN_dA(finetune_lr=0.003, training_epochs=7, drop_rate=0.25, batch_size=5, reg=1e-7): data_ratio = 26 [all_examples, all_labels, in_channels, all_train_sizes, test_size, val_size]= load_ATP_data(data_ratio) Xtr=all_examples[0] Xt=all_examples[1] Xv=all_examples[2] ytr=all_labels[0] yt=all_labels[1] yv=all_labels[2] n_train_batches = [ a/batch_size for a in all_train_sizes] n_valid_batches = val_size n_test_batches = test_size n_valid_batches /= batch_size n_test_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' flt_channels = 64 flt_time = 3 in_channels = 4 flt_height = 3 flt_width = 3 batchsize = batch_size in_time = 20 in_height = 20 in_width = 20 signals_shape = (batchsize, in_time, in_channels, in_height, in_width) filters_shape = (flt_channels, flt_time, in_channels, flt_height, flt_width) # construct the stacked denoising autoencoder class dtensor5 = T.TensorType('float32', (False,)*5) x = dtensor5('x') y = T.ivector('y') classifier = ConvDropNet(rng=numpy_rng, input=x, in_channels=in_channels, batch_size=batch_size, dropout_rates=[drop_rate,drop_rate,drop_rate,drop_rate,drop_rate,drop_rate]) L2_sqr = classifier.L2_sqr cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr finetune_cost = dropout_cost errors = classifier.errors(y) # get the training, validation and testing function for the model print '... getting the finetuning functions' # compute number of minibatches for training, validation and testing index = T.lscalar('index') # index to a [mini]batch lr = T.scalar(name='lr') tparams = classifier.tparams cost = finetune_cost+(reg*L2_sqr) grads = T.grad(cost, tparams.values()) zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] running_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad' % k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k)for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] train_fn = theano.function([x,y], cost, updates=zgup + rgup + rg2up, name='rmsprop_f_grad_shared') updir = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_updir' % k) for k, p in tparams.iteritems()] updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)] param_up = [(p, p + udn[1]) for p, udn in zip(tparams.values(), updir_new)] f_update = theano.function([lr], [], updates=updir_new + param_up, on_unused_input='ignore', name='rmsprop_f_update') index = T.lscalar('index') # index to a [mini]batch train_score_i = theano.function( [x,y], errors, name='train' ) test_score_i = theano.function( [x,y], errors, name='test' ) valid_score_i = theano.function( [x,y], errors, name='valid' ) print '... finetunning the model' # early-stopping parameters patience = 100000 # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches[0], patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_test_score = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): list_file= open('../progress/progress_3DCNN_ATP_dropout_'+str(drop_rate)+'.txt','a') epoch = epoch + 1 train_losses=[] for part_index in xrange (20): for minibatch_index in xrange(n_train_batches[part_index]): X_train=Xtr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size] y_train=ytr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size] train_set_x, train_set_y = shared_dataset(X_train,y_train) train_set_x = train_set_x.dimshuffle(0,4,1,2,3) minibatch_avg_cost = train_fn(train_set_x.eval(), train_set_y.eval()) train_err=train_score_i(train_set_x.eval(), train_set_y.eval()) train_losses.append(train_err) f_update(finetune_lr) iter = (epoch - 1) * n_train_batches[part_index] + minibatch_index train_losses=numpy.array(train_losses) this_train_loss=numpy.mean(train_losses) list_file.write('epoch %i, train error %f %%' % (epoch, this_train_loss * 100.)) list_file.write('\n') validation_losses=[] for i in xrange(n_valid_batches): valid_set_x, valid_set_y = shared_dataset(Xv[i*batch_size:(i+1)*batch_size],yv[i*batch_size:(i+1)*batch_size]) valid_set_x=valid_set_x.dimshuffle(0,4,1,2,3) validation_losses.append(valid_score_i(valid_set_x.eval(),valid_set_y.eval())) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, validation error %f %%' % (epoch, this_validation_loss * 100.)) list_file.write('epoch %i, validation error %f %%' % (epoch, this_validation_loss * 100.)) list_file.write('\n') dump_weights_pickle(classifier,file_name='../results/weights/weight_3DCNN_ATP_epoch_'+str(epoch)+'.zip') # if we got the best validation score until now if this_validation_loss <= best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter best_test_score = test_score * 100. end_time = time.clock() dump_weights_pickle(classifier,file_name='../results/weights/weight_3DCNN_ATP_epoch_'+str(epoch)+'.zip') if __name__ == '__main__': # THEANO_FLAGS=device=gpu,floatX=float32 python 3DCNN_atp_drop.py lr = 0.003 epoch = 10 drop_rate = 0.2 test_fine_S_CNN_dA(finetune_lr=lr,training_epochs=epoch,drop_rate=drop_rate)