import os import sys import time import numpy import theano import theano.tensor as T from layers import * import random from data_utils import * from theano.misc.pkl_utils import dump import argparse class ConvDropNet(object): def __init__(self, rng, input, in_channels, dropout_rates, batch_size, use_bias=True): self.layers = [] self.dropout_layers = [] ####### filter_w=3 num_3d_pixel=20 layer0_w = num_3d_pixel layer0_h = num_3d_pixel layer0_d = num_3d_pixel layer1_w = (layer0_w-3+1) #14 layer1_h = (layer0_h-3+1) layer1_d = (layer0_d-3+1) layer2_w = (layer1_w-3+1)/2 #14 layer2_h = (layer1_h-3+1)/2 layer2_d = (layer1_d-3+1)/2 layer3_w = (layer2_w-3+1)/2 layer3_h = (layer2_h-3+1)/2 layer3_d = (layer2_d-3+1)/2 ###################### # BUILD ACTUAL MODEL # ###################### batchsize = batch_size in_time = num_3d_pixel in_width = num_3d_pixel in_height = num_3d_pixel flt_channels = 100 flt_time = filter_w flt_width = filter_w flt_height = filter_w signals_shape0 = (batchsize, in_time, in_channels, in_height, in_width) filters_shape0 = (flt_channels, 3, in_channels, 3, 3) signals_shape1 = (batchsize, layer1_d, flt_channels, layer1_h, layer1_w) filters_shape1 = (flt_channels*2, 3, flt_channels, 3, 3) signals_shape2 = (batchsize, layer2_d, flt_channels*2, layer2_h, layer2_w) filters_shape2 = (flt_channels*4, 3, flt_channels*2, 3, 3) layer0_input = input.reshape(signals_shape0) #20 ################################### Dropout_layer0 = Dropout_Conv_3d_Layer(rng=rng, input=layer0_input, image_shape=signals_shape0, filter_shape=filters_shape0, dropout_rate=dropout_rates[0]) self.dropout_layers.append(Dropout_layer0) next_dropout_input = Dropout_layer0.output layer0 = Conv_3d_Layer(rng=rng, input=layer0_input, #18 image_shape=signals_shape0, filter_shape=filters_shape0, W=Dropout_layer0.W * (1 - dropout_rates[0]), b=Dropout_layer0.b * (1 - dropout_rates[0]), ) self.layers.append(layer0) next_layer_input = layer0.output ################################## ################################### Dropout_layer1 = Dropout_Conv_3d_Layer(rng=rng, input=next_dropout_input, image_shape=signals_shape1, filter_shape=filters_shape1, dropout_rate=dropout_rates[1]) Dropout_layer1_pool = PoolLayer3D(input=Dropout_layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.dropout_layers.append(Dropout_layer1) next_dropout_input = Dropout_layer1_pool.output.dimshuffle(0,2,1,3,4) layer1 = Conv_3d_Layer(rng=rng, input=next_layer_input, image_shape=signals_shape1, filter_shape=filters_shape1, W=Dropout_layer1.W * (1 - dropout_rates[1]), b=Dropout_layer1.b * (1 - dropout_rates[1]), ) layer1_pool = PoolLayer3D(input=layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.layers.append(layer1) next_layer_input = layer1_pool.output.dimshuffle(0,2,1,3,4) ################################## ################################### Dropout_layer2 = Dropout_Conv_3d_Layer(rng=rng, input=next_dropout_input, image_shape=signals_shape2, filter_shape=filters_shape2, dropout_rate=dropout_rates[2]) Dropout_layer2_pool = PoolLayer3D(input=Dropout_layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.dropout_layers.append(Dropout_layer2) next_dropout_input = Dropout_layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2) layer2 = Conv_3d_Layer(rng=rng, input=next_layer_input, image_shape=signals_shape2, filter_shape=filters_shape2, W=Dropout_layer2.W * (1 - dropout_rates[2]), b=Dropout_layer2.b * (1 - dropout_rates[2]), ) layer2_pool = PoolLayer3D(input=layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4 self.layers.append(layer2) next_layer_input = layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2) ################################## Dropout_layer3 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), n_out=1000, dropout_rate=dropout_rates[3]) self.dropout_layers.append(Dropout_layer3) next_dropout_input = Dropout_layer3.output layer3 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer3.W * (1 - dropout_rates[3]), b=Dropout_layer3.b * (1 - dropout_rates[3]), n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), n_out=1000, ) self.layers.append(layer3) next_layer_input = layer3.output ################################## Dropout_layer4 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=1000, n_out=100, dropout_rate=dropout_rates[4]) self.dropout_layers.append(Dropout_layer4) next_dropout_input = Dropout_layer4.output layer4 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer4.W * (1 - dropout_rates[4]), b=Dropout_layer4.b * (1 - dropout_rates[4]), n_in=1000, n_out=100, ) self.layers.append(layer4) next_layer_input = layer4.output ##################### TODO ####################### Dropout_layer5 = LogisticRegression( input=next_dropout_input, n_in=100, n_out=20) self.dropout_layers.append(Dropout_layer5) layer5 = LogisticRegression( input=next_layer_input, W=Dropout_layer5.W, b=Dropout_layer5.b, n_in=100, n_out=20) self.layers.append(layer5) self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood self.dropout_errors = self.dropout_layers[-1].errors self.negative_log_likelihood = self.layers[-1].negative_log_likelihood self.errors = self.layers[-1].errors self.L2_sqr = T.sum(Dropout_layer0.W ** 2)+T.sum(Dropout_layer1.W**2)+T.sum(Dropout_layer2.W**2)+T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) self.params = [ param for layer in self.dropout_layers for param in layer.params ] def train_3DCNN(num_of_parts=6, learning_rate=0.002, n_epochs=10, batch_size=20, filter_w=3, reg=5e-6, dropout=True, dropout_rates=[0.3,0.3,0.3,0.3,0.3]): rng = numpy.random.RandomState(23455) [all_examples, all_labels, all_train_sizes, val_size]=load_ATOM_BOX(num_of_parts) Xtr=all_examples[0] Xv=all_examples[1] ytr=all_labels[0] yv=all_labels[1] valid_set_x, valid_set_y = shared_dataset(Xv, yv) valid_set_x=valid_set_x.dimshuffle(0,4,1,2,3) n_train_batches = [ a/batch_size for a in all_train_sizes] n_valid_batches = val_size n_valid_batches /= batch_size index = T.lscalar() # index to a [mini]batch dtensor5 = T.TensorType('float32', (False,)*5) x = dtensor5('x') y = T.ivector('y') classifier = ConvDropNet(rng=rng, input=x, in_channels=4, batch_size=batch_size, dropout_rates=[0.3,0.3,0.3,0.3,0.3]) L2_sqr = classifier.L2_sqr cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr print ('... building the model') validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) output = dropout_cost if dropout else cost grads = [] for param in classifier.params: gparam = T.grad(output, param) grads.append(gparam) # This is vanilla SGD, you can try other optimization methods such as RMSprop, Adam updates = [] for param_i, grad_i in zip(classifier.params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([x,y], cost, updates=updates) train_error = theano.function([x,y], classifier.errors(y)) ############### # TRAIN MODEL # ############### print ('... training') # early-stopping parameters patience = 100000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches[0], patience / 2) best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. cost_ij = 0 epoch = 0 done_looping = False iter = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 train_history=[] for part_index in range (num_of_parts): for minibatch_index in range(n_train_batches[part_index]): X_train=Xtr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size] y_train=ytr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size] train_set_x, train_set_y = shared_dataset(X_train,y_train) train_set_x = train_set_x.dimshuffle(0,4,1,2,3) iter = (epoch - 1) * n_train_batches[part_index] + minibatch_index cost_ij = train_model(train_set_x.eval(), train_set_y.eval()) train_error_ij = train_error(train_set_x.eval(), train_set_y.eval()) print ("train_error_ij") print (train_error_ij) train_history.append(train_error_ij) if (iter + 1) % validation_frequency == 0: this_train_error = numpy.mean(train_history) validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) list_file= open('../progress_3DCNN_full_sidechain.txt','a') list_file.write('epoch %i, minibatch %i/%i, train error %f %%,' % (epoch, minibatch_index + 1, n_train_batches[part_index], this_train_error * 100.)) list_file.write('validation error %f %%' %(this_validation_loss * 100.)) list_file.write('\n') list_file.close() if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter dump_weights_pickle(classifier) list_file= open('../progress_3DCNN_full_sidechain.txt','a') list_file.write('getting weights from classifier ...'+'\n') list_file.close() #dump_weights_pickle(classifier) print('Optimization complete.') def dump_weights_pickle(classifier,file_name='../weights/weight_3DCNN_full_sidechain.zip'): W0 = classifier.params[0] W1 = classifier.params[2] W2 = classifier.params[4] W3 = classifier.params[6] W4 = classifier.params[8] W5 = classifier.params[10] b0 = classifier.params[1] b1 = classifier.params[3] b2 = classifier.params[5] b3 = classifier.params[7] b4 = classifier.params[9] b5 = classifier.params[11] with open(file_name, 'wb') as f: dump((W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5), f) if __name__ == '__main__': train_3DCNN(num_of_parts=6, learning_rate=0.002, n_epochs=50, batch_size=20, filter_w=3, reg=5e-6)