import os import sys import time import numpy import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from pprint import pprint from theano.tensor.signal import downsample from theano.tensor.nnet import conv from theano.tensor.nnet import conv3d2d from scipy.io import matlab import re import math from theano import shared from collections import OrderedDict import random from layers import * def shared_dataset(data_x, borrow=True): shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) return shared_x def load_data(ID): input_dir = '../data/numpy/' files = [ os.path.join(input_dir,f) for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir,f))] files = [t for t in files if ID in t] files = [t for t in files if '.dat' in t] total_num = len(files) print (total_num) all_X = [] for dat_num in range (0,total_num): print (dat_num) X=numpy.load(input_dir+ID+'_'+str(dat_num)+'.dat') test_set_x = shared_dataset(X) test_set_x=test_set_x.dimshuffle(0,4,1,2,3) all_X.append(test_set_x) return all_X, total_num def load_weights_pickle(result_weights_ID,fold): file_name='../../Benchmark_GASS/NOS/results/weights/weight_'+result_weights_ID+'_'+str(fold)+'.zip' keys=numpy.load(file_name).keys() print keys W0=numpy.load(file_name)[keys[0]] W1=numpy.load(file_name)[keys[1]] W2=numpy.load(file_name)[keys[2]] W3=numpy.load(file_name)[keys[3]] b0=numpy.load(file_name)[keys[4]] b1=numpy.load(file_name)[keys[5]] b2=numpy.load(file_name)[keys[6]] b3=numpy.load(file_name)[keys[7]] W0=theano.shared(value=W0, name='W0', borrow=True) W1=theano.shared(value=W1, name='W1', borrow=True) W2=theano.shared(value=W2, name='W2', borrow=True) W3=theano.shared(value=W3, name='W3', borrow=True) b0=theano.shared(value=b0, name='b0', borrow=True) b1=theano.shared(value=b1, name='b1', borrow=True) b2=theano.shared(value=b2, name='b2', borrow=True) b3=theano.shared(value=b3, name='b3', borrow=True) return [W0,W1,W2,W3,b0,b1,b2,b3] class fine_pool_S_CNN_dA(object): def __init__( self, numpy_rng, signals_shape, filters_shape, input, Weights, Bias, theano_rng=None, filter_sizes=[100, 200, 400], n_outs=2, corruption_levels=[0.1, 0.1, 0.1], ): self.CNN_pool_layers = [] self.tparams = OrderedDict() self.params = [] self.n_layers = len(filter_sizes) assert self.n_layers > 0 (batch_size, in_time, in_channels, in_height, in_width) = signals_shape (flt_channels, flt_time, in_channels, flt_height, flt_width) = filters_shape assert flt_channels == filter_sizes[0] if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data dtensor5 = T.TensorType('float32', (False,)*5) for i in xrange(self.n_layers-1): if i == 0: image_shape = signals_shape filter_shape = filters_shape else: image_shape = (batch_size, int(in_time/math.pow(2, i)), filter_sizes[i - 1], int(in_height/math.pow(2, i)), int(in_width/math.pow(2, i))) filter_shape = (filter_sizes[i], flt_time, filter_sizes[i - 1], flt_height, flt_width) if i == 0: layer_input = input else: layer_input = self.CNN_pool_layers[-1].output print "layer "+str(i) print "signal shape:" print image_shape print "filter shape:" print filter_shape CNN_pool_layer = Pad_Conv_Pool(rng=numpy_rng, input=layer_input, filter_shape=filter_shape, image_shape=image_shape, W=Weights[i], b=Bias[i] ) # add the layer to our list of layers self.CNN_pool_layers.append(CNN_pool_layer) self.params.extend(CNN_pool_layer.params) self.tparams['W_'+str(i)] = CNN_pool_layer.W self.tparams['b_'+str(i)] = CNN_pool_layer.b print "CNN layer W:" print CNN_pool_layer.W.shape.eval() print "CNN layer b:" print CNN_pool_layer.b.shape.eval() # end-snippet-2 i=i+1 image_shape = (batch_size, int(in_time/math.pow(2, i)), filter_sizes[i - 1], int(in_height/math.pow(2, i)), int(in_width/math.pow(2, i))) filter_shape = (filter_sizes[i], flt_time, filter_sizes[i - 1], flt_height, flt_width) layer_input = self.CNN_pool_layers[-1].output print "signal shape:" print image_shape print "filter shape:" print filter_shape self.CNN_layer = Conv_3d_Layer(rng=numpy_rng, input=layer_input, filter_shape=filter_shape, image_shape=image_shape, W=Weights[i], b=Bias[i] ) self.params.extend(self.CNN_layer.params) self.tparams['W_2'] = self.CNN_layer.W self.tparams['b_2'] = self.CNN_layer.b self.logLayer = LogisticRegression( input=self.CNN_layer.output.flatten(2), n_in=(filter_sizes[-1]*in_time*in_height*in_width)/(math.pow(2, i)*math.pow(2, i)*math.pow(2, i)), n_out=n_outs, W=Weights[-1], b=Bias[-1], ) self.params.extend(self.logLayer.params) self.tparams['W_log'] = self.logLayer.W self.tparams['b_log'] = self.logLayer.b self.L2_sqr = T.sum(self.CNN_pool_layers[0].W ** 2)+T.sum(self.CNN_pool_layers[1].W**2)+ T.sum(self.CNN_layer.W**2)+T.sum(self.logLayer.W**2) def post_fine_S_CNN_dA(fold,all_X, total_num,target_RES, target_ATOM, site_name): result_weights_ID = target_RES+'_'+target_ATOM+'_'+site_name print "fold: "+str(fold) [W0,W1,W2,W3,b0,b1,b2,b3] = load_weights_pickle(target_RES+'_'+target_ATOM,fold) Weights = [W0,W1,W2,W3] Bias = [b0,b1,b2,b3] numpy_rng = numpy.random.RandomState(89677) print '... building the model' flt_channels = 32 flt_time = 3 in_channels = 4 flt_height = 3 flt_width = 3 in_time = 20 in_height = 20 in_width = 20 filters_shape = (flt_channels, flt_time, in_channels, flt_height, flt_width) # construct the stacked denoising autoencoder class dtensor5 = T.TensorType('float32', (False,)*5) for ind in range (0,total_num): x = all_X[ind] batch_size = x.eval().shape[0] signals_shape = (batch_size, in_time, in_channels, in_height, in_width) s_cnn_da = fine_pool_S_CNN_dA( input = x, Weights = Weights, Bias = Bias, numpy_rng=numpy_rng, signals_shape=signals_shape, filters_shape=filters_shape, filter_sizes=[32, 64, 128], n_outs=2, corruption_levels=[0., 0., 0.] ) pred = s_cnn_da.logLayer.y_pred.eval() pos_prob = s_cnn_da.logLayer.p_y_given_x[:,1].eval() pos_prob.dump('../results/prob_score/'+result_weights_ID+'_3DCNN_fold_'+str(fold)+'_'+str(ind)+'.dat') def eval_NOS_3DCNN(target_RES, target_ATOM, site_name): numpy_ID = target_RES+'_'+target_ATOM+'_'+site_name all_X, total_num = load_data(numpy_ID) total_fold =5 for fold in range(0,total_fold): post_fine_S_CNN_dA(fold,all_X, total_num,target_RES, target_ATOM, site_name)