import os import sys import time import numpy import theano import theano.tensor as T from layers import * import random from data_utils import * from theano.misc.pkl_utils import dump import argparse class FEATURE_SOFTMAX(object): def __init__(self, rng, input, batch_size, use_bias=True): self.layers = [] self.dropout_layers = [] batchsize = batch_size signals_shape0 = (batchsize, 480) layer0_input = input.reshape(signals_shape0) #20 next_dropout_input = layer0_input next_layer_input = layer0_input ################################## # W4: 200*layer4_w*layer4_h, 500 Dropout_layer3 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=480, n_out=100, dropout_rate=0.3) self.dropout_layers.append(Dropout_layer3) next_dropout_input = Dropout_layer3.output # Reuse the paramters from the dropout layer here, in a different # path through the graph. layer3 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer3.W * (1 - 0.3), b=Dropout_layer3.b * (1 - 0.3), n_in=480, n_out=100, ) self.layers.append(layer3) next_layer_input = layer3.output Dropout_layer4 = DropoutHiddenLayer(rng=rng, input=next_dropout_input, activation=relu, n_in=100, n_out=20, dropout_rate=0.3) self.dropout_layers.append(Dropout_layer4) next_dropout_input = Dropout_layer4.output # Reuse the paramters from the dropout layer here, in a different # path through the graph. layer4 = HiddenLayer(rng=rng, input=next_layer_input, activation=relu, # scale the weight matrix W with (1-p) W=Dropout_layer4.W * (1 - 0.3), b=Dropout_layer4.b * (1 - 0.3), n_in=100, n_out=20, ) self.layers.append(layer4) next_layer_input = layer4.output ##################### TODO ####################### Dropout_layer5 = LogisticRegression( input=next_dropout_input, n_in=20, n_out=20) self.dropout_layers.append(Dropout_layer5) layer5 = LogisticRegression( input=next_layer_input, W=Dropout_layer5.W, b=Dropout_layer5.b, n_in=20, n_out=20) self.layers.append(layer5) # Use the negative log likelihood of the logistic regression layer as # the objective. self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood self.dropout_errors = self.dropout_layers[-1].errors self.negative_log_likelihood = self.layers[-1].negative_log_likelihood self.errors = self.layers[-1].errors self.L2_sqr = T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) # Grab all the parameters together. self.params = [ param for layer in self.dropout_layers for param in layer.params ] #### def train_FEATURE_SOFTMAX(learning_rate=0.002, n_epochs=10, batch_size=20, reg=5e-6, dropout=True): rng = numpy.random.RandomState(23455) [Xtr, ytr, Xt, yt, Xv, yv] = load_FEATURE() n_train_batches = Xtr.shape[0]/batch_size n_valid_batches = Xv.shape[0]/batch_size n_test_batches = Xt.shape[0]/batch_size valid_set_x, valid_set_y = shared_dataset(Xv, yv) test_set_x, test_set_y = shared_dataset(Xt, yt) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.fmatrix('x') y = T.ivector('y') ###################### # BUILD ACTUAL MODEL # ###################### classifier = FEATURE_SOFTMAX(rng=rng, input=x, batch_size=batch_size) L2_sqr = classifier.L2_sqr cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr print '... building the model' # Compile theano function for testing. test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) output = dropout_cost if dropout else cost grads = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(output, param) grads.append(gparam) updates = [] for param_i, grad_i in zip(classifier.params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([x,y], cost, updates=updates) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 100000 # look as this many examples regardless patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() cost_ij = 0 epoch = 0 done_looping = False iter = 0 startc = time.clock() cost_hisotry=[] train_history=[] valid_history=[] while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): X_train=Xtr[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] y_train=ytr[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] train_set_x, train_set_y = shared_dataset(X_train,y_train) iter = (epoch - 1) * n_train_batches + minibatch_index cost_ij = train_model(train_set_x.eval(), train_set_y.eval()) cost_hisotry.append(cost_ij) if (iter + 1) % validation_frequency == 0: list_file= open('../progress_FEATURE.txt','a') validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) valid_history.append(100*(1-this_validation_loss)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) list_file.write('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) list_file.write('\n') # if we got the best validation score until now if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) list_file.write((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) list_file.write('\n') list_file.close() list_file= open('../progress_FEATURE.txt','a') list_file.write('getting weights from classifier ...'+'\n') dump_weights_pickle(classifier,'../weights/weight_FEATURE.zip') end_time = time.clock() dump_weights_pickle(classifier,'../weights/weight_FEATURE.zip') print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) def dump_weights_pickle(classifier,file_name): W3 = classifier.params[0] W4 = classifier.params[2] W5 = classifier.params[4] b3 = classifier.params[1] b4 = classifier.params[3] b5 = classifier.params[5] with open(file_name, 'wb') as f: dump((W3, W4, W5, b3, b4, b5), f) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("epoch", help="epoch") args = parser.parse_args() num_epoch = int(args.epoch) train_FEATURE_SOFTMAX(learning_rate=0.002, n_epochs=num_epoch, batch_size=20, reg=5e-8)