import os import sys import time import numpy import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from theano.tensor.nnet.nnet import sigmoid from theano import shared from collections import OrderedDict from theano.misc.pkl_utils import dump import numpy as np from process_poc_pretrain import * from data_utils import * from graph_cnn_layers import * max_poc_degrees = 20 max_nodes_in_poc = 50 def shared_dataset(env_features, env_neighbors, env_degrees, mask, borrow=True): shared_env_features = theano.shared(numpy.asarray(env_features,dtype=theano.config.floatX),borrow=borrow) shared_env_neighbors = theano.shared(numpy.asarray(env_neighbors,dtype=theano.config.floatX),borrow=borrow) shared_env_degrees = theano.shared(numpy.asarray(env_degrees,dtype=theano.config.floatX),borrow=borrow) shared_mask = theano.shared(numpy.asarray(mask,dtype=theano.config.floatX),borrow=borrow) return shared_env_features, shared_env_neighbors, shared_env_degrees, shared_mask def dump_weights_pickle(num_layers, Weights, Bias, file_name): all_var_tuple = () for i in range(num_layers): [W_self,W_degrees]=Weights[i] [b_prime,b_prime_self,b_layer]=Bias[i] all_var_tuple = all_var_tuple + (W_self,) for j in range(0,max_poc_degrees): all_var_tuple = all_var_tuple + (W_degrees[j],) all_var_tuple = all_var_tuple + (b_prime,b_prime_self,b_layer) W_out_0=Weights[2] W_prime_out_0=Weights[3] W_out_1=Weights[4] W_prime_out_1=Weights[5] b_out_0=Bias[2] b_prime_out_0=Bias[3] b_out_1=Bias[4] b_prime_out_1=Bias[5] all_var_tuple = all_var_tuple + (W_out_0,W_prime_out_0,b_out_0,b_prime_out_0,W_out_1,W_prime_out_1, b_out_1,b_prime_out_1) with open(file_name, 'wb') as f: dump(all_var_tuple, f) def load_autoencoder_I_W(file_name): pickle_keys = numpy.load(file_name).keys() W_self_0=numpy.load(file_name)[pickle_keys[0]] W_degrees_0 = [] for i in range(0,max_poc_degrees): W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[1+i]], dtype=theano.config.floatX) W_d=theano.shared(value=W_d, name='W_degree_0_d_'+str(i), borrow=True) W_degrees_0.append(W_d) b_prime_0=numpy.load(file_name)[pickle_keys[1+max_poc_degrees]] b_prime_self_0=numpy.load(file_name)[pickle_keys[2+max_poc_degrees]] b_layer_0=numpy.load(file_name)[pickle_keys[3+max_poc_degrees]] begin_layer_1 = max_poc_degrees+4 W_self_1=numpy.load(file_name)[pickle_keys[0+begin_layer_1]] W_degrees_1 = [] for i in range(0,max_poc_degrees): W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[begin_layer_1+1+i]], dtype=theano.config.floatX) W_d=theano.shared(value=W_d, name='W_degree_1_d_'+str(i), borrow=True) W_degrees_1.append(W_d) b_prime_1=numpy.load(file_name)[pickle_keys[begin_layer_1+1+max_poc_degrees]] b_prime_self_1=numpy.load(file_name)[pickle_keys[begin_layer_1+2+max_poc_degrees]] b_layer_1=numpy.load(file_name)[pickle_keys[begin_layer_1+3+max_poc_degrees]] W_self_0 = numpy.asarray(W_self_0, dtype=theano.config.floatX) W_self_1 = numpy.asarray(W_self_1, dtype=theano.config.floatX) W_self_0=theano.shared(value=W_self_0, name='W_self_0', borrow=True) W_self_1=theano.shared(value=W_self_1, name='W_self_1', borrow=True) b_prime_0 = numpy.asarray(b_prime_0, dtype=theano.config.floatX) b_prime_1 = numpy.asarray(b_prime_1, dtype=theano.config.floatX) b_prime_0=theano.shared(value=b_prime_0, name='b_prime_0', borrow=True) b_prime_1=theano.shared(value=b_prime_1, name='b_prime_1', borrow=True) b_prime_self_0 = numpy.asarray(b_prime_self_0, dtype=theano.config.floatX) b_prime_self_1 = numpy.asarray(b_prime_self_1, dtype=theano.config.floatX) b_prime_self_0=theano.shared(value=b_prime_self_0, name='b_prime_self_0', borrow=True) b_prime_self_1=theano.shared(value=b_prime_self_1, name='b_prime_self_1', borrow=True) b_layer_0 = numpy.asarray(b_layer_0, dtype=theano.config.floatX) b_layer_1 = numpy.asarray(b_layer_1, dtype=theano.config.floatX) b_layer_0=theano.shared(value=b_layer_0, name='b_layer_0', borrow=True) b_layer_1=theano.shared(value=b_layer_1, name='b_layer_1', borrow=True) return [W_self_0, W_degrees_0, b_prime_0,b_prime_self_0, b_layer_0, W_self_1, W_degrees_1, b_prime_1, b_prime_self_1,b_layer_1] def load_autoencoder_II_W(file_name): pickle_keys = numpy.load(file_name).keys() W_self_0=numpy.load(file_name)[pickle_keys[0]] W_degrees_0 = [] for i in range(0,max_poc_degrees): W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[1+i]], dtype=theano.config.floatX) W_d=theano.shared(value=W_d, name='W_degree_0_d_'+str(i), borrow=True) W_degrees_0.append(W_d) b_prime_0=numpy.load(file_name)[pickle_keys[1+max_poc_degrees]] b_prime_self_0=numpy.load(file_name)[pickle_keys[2+max_poc_degrees]] b_layer_0=numpy.load(file_name)[pickle_keys[3+max_poc_degrees]] begin_layer_1 = max_poc_degrees+4 W_self_1=numpy.load(file_name)[pickle_keys[0+begin_layer_1]] W_degrees_1 = [] for i in range(0,max_poc_degrees): W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[begin_layer_1+1+i]], dtype=theano.config.floatX) W_d=theano.shared(value=W_d, name='W_degree_1_d_'+str(i), borrow=True) W_degrees_1.append(W_d) b_prime_1=numpy.load(file_name)[pickle_keys[begin_layer_1+1+max_poc_degrees]] b_prime_self_1=numpy.load(file_name)[pickle_keys[begin_layer_1+2+max_poc_degrees]] b_layer_1=numpy.load(file_name)[pickle_keys[begin_layer_1+3+max_poc_degrees]] W_self_0 = numpy.asarray(W_self_0, dtype=theano.config.floatX) W_self_1 = numpy.asarray(W_self_1, dtype=theano.config.floatX) W_self_0=theano.shared(value=W_self_0, name='W_self_0', borrow=True) W_self_1=theano.shared(value=W_self_1, name='W_self_1', borrow=True) b_prime_0 = numpy.asarray(b_prime_0, dtype=theano.config.floatX) b_prime_1 = numpy.asarray(b_prime_1, dtype=theano.config.floatX) b_prime_0=theano.shared(value=b_prime_0, name='b_prime_0', borrow=True) b_prime_1=theano.shared(value=b_prime_1, name='b_prime_1', borrow=True) b_prime_self_0 = numpy.asarray(b_prime_self_0, dtype=theano.config.floatX) b_prime_self_1 = numpy.asarray(b_prime_self_1, dtype=theano.config.floatX) b_prime_self_0=theano.shared(value=b_prime_self_0, name='b_prime_self_0', borrow=True) b_prime_self_1=theano.shared(value=b_prime_self_1, name='b_prime_self_1', borrow=True) b_layer_0 = numpy.asarray(b_layer_0, dtype=theano.config.floatX) b_layer_1 = numpy.asarray(b_layer_1, dtype=theano.config.floatX) b_layer_0=theano.shared(value=b_layer_0, name='b_layer_0', borrow=True) b_layer_1=theano.shared(value=b_layer_1, name='b_layer_1', borrow=True) begin_W_out = begin_layer_1+max_poc_degrees+4 W_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+0]] b_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]] b_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+2]] W_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+3]] b_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+4]] b_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]] W_out_0 = numpy.asarray(W_out_0, dtype=theano.config.floatX) W_out_1 = numpy.asarray(W_out_1, dtype=theano.config.floatX) W_out_0 = theano.shared(value=W_out_0, name='W_out_0', borrow=True) W_out_1 = theano.shared(value=W_out_1, name='W_out_1', borrow=True) b_out_0 = numpy.asarray(b_out_0, dtype=theano.config.floatX) b_out_1 = numpy.asarray(b_out_1, dtype=theano.config.floatX) b_out_0 = theano.shared(value=b_out_0, name='b_out_0', borrow=True) b_out_1 = theano.shared(value=b_out_1, name='b_out_1', borrow=True) b_prime_out_0 = numpy.asarray(b_prime_out_0, dtype=theano.config.floatX) b_prime_out_1 = numpy.asarray(b_prime_out_1, dtype=theano.config.floatX) b_prime_out_0 = theano.shared(value=b_prime_out_0, name='b_prime_out_0', borrow=True) b_prime_out_1 = theano.shared(value=b_prime_out_1, name='b_prime_out_1', borrow=True) return [W_self_0, W_degrees_0, b_prime_0, b_prime_self_0, b_layer_0, W_out_0, b_out_0, b_prime_out_0, W_self_1, W_degrees_1, b_prime_1, b_prime_self_1, b_layer_1, W_out_1, b_out_1, b_prime_out_1] class HiddenLayer(object): def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None, use_bias=True): self.input = input self.activation = activation if W is None: W_values = numpy.asarray(0.01 * rng.standard_normal( size=(n_in, n_out)), dtype=theano.config.floatX) W = theano.shared(value=W_values, name='W') if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b') self.W = W self.b = b if use_bias: lin_output = T.dot(input, self.W) + self.b else: lin_output = T.dot(input, self.W) self.output = (lin_output if activation is None else activation(lin_output)) # parameters of the model if use_bias: self.params = [self.W, self.b] else: self.params = [self.W] class sparse_dA_II(object): def __init__( self, numpy_rng, corruption_level, theano_rng=None, n_visible=470, n_hidden=500, W_out = None, b_out = None, b_layer = None, W_self = None, W_degrees = None, b_prime=None, b_prime_self=None, b_prime_out=None, num_in_features = None, num_input = None, env_features = None, env_neighbors = None, env_degrees = None, mask = None, layer_fp=None, fp_length=512 ): rng = numpy_rng self.n_visible = n_visible self.n_hidden = n_hidden if layer_fp == None: self.layer_fp=theano.shared(numpy.zeros((num_input,fp_length),dtype=theano.config.floatX),borrow=True) else: self.layer_fp=layer_fp self.num_in_features = num_in_features self.env_features = env_features self.mask = mask self.env_degrees = env_degrees # create a Theano random generator that gives symbolic random values if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.b_prime = b_prime self.b_prime_self = b_prime_self if not b_prime_out: b_prime_out = theano.shared( value=numpy.zeros( num_in_features, dtype=theano.config.floatX ), borrow=True ) self.b_prime_out = b_prime_out # tied weights, therefore W_prime is W transpose self.theano_rng = theano_rng # if no input is given, generate a variable representing the input corrupt_env_features = self.theano_rng.binomial(size=self.env_features.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * self.env_features self.Graph_CNN = Graph_Conv( rng=numpy_rng, W_out=W_out, b_out=b_out, b_layer=b_layer, W_self=W_self, W_degrees=W_degrees, num_in_features=self.num_in_features, num_hidden_features=self.n_hidden, num_input=num_input, env_features=corrupt_env_features, env_neighbors=env_neighbors, env_degrees=env_degrees, mask=mask, layer_fp=self.layer_fp, fp_length=fp_length, ) self.W_primes = [] for degree in range(max_poc_degrees): self.W_primes.append(self.Graph_CNN.W_degrees[degree].T) self.W_prime_self = self.Graph_CNN.W_self.T self.W_prime_out = self.Graph_CNN.W_out.T self.layer_conv = self.Graph_CNN.layer_conv self.params = [self.Graph_CNN.W_out, self.Graph_CNN.b_out] self.params.append(self.b_prime_out) x = self.Graph_CNN.env_features_reshaped y = self.Graph_CNN.env_outputs self.z = T.tanh(T.dot(y, self.W_prime_out) + self.b_prime_out) L = T.sum((x-self.z)**2,axis=1) L = L * mask self.cost = T.mean(L)#+betta*penalty def get_cost_updates(self, learning_rate): x = self.Graph_CNN.env_features_reshaped y = self.Graph_CNN.env_outputs z = T.tanh(T.dot(y, self.W_prime_out) + self.b_prime_out) L = T.sum((x-z)**2,axis=1) L = L * self.mask cost = T.mean(L)#+betta*penalty gparams = T.grad(cost, self.params) updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ] return (cost, updates) class SdA_II(object): def __init__( self, numpy_rng, theano_rng=None, n_ins=480, hidden_layers_sizes=[200, 100], corruption_levels=[0.05, 0.05], Weights = None, Bias = None, num_input = 5, env_features = None, env_neighbors = None, env_degrees = None, mask = None, layer_fp=None, fp_length=512, ): [W_self_0, W_degrees_0, W_out_0, W_self_1, W_degrees_1, W_out_1]= Weights [b_prime_0, b_prime_self_0,b_layer_0, b_out_0,b_prime_out_0, b_prime_1, b_prime_self_1,b_layer_1, b_out_1,b_prime_out_1]= Bias W_out = [W_out_0, W_out_1] b_out = [b_out_0, b_out_1] b_prime_out = [b_prime_out_0, b_prime_out_1] b_layer = [b_layer_0, b_layer_1] W_self = [W_self_0, W_self_1] W_degrees = [W_degrees_0, W_degrees_1] b_prime = [b_prime_0, b_prime_1] b_prime_self = [b_prime_self_0, b_prime_self_1] self.Graph_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = env_features else: layer_input = self.dA_layers[-1].layer_conv dA_layer = sparse_dA_II(numpy_rng=numpy_rng, theano_rng=theano_rng, corruption_level=corruption_levels[i], n_visible=input_size, n_hidden=hidden_layers_sizes[i], W_out = W_out[i], b_out = b_out[i], b_layer = b_layer[i], W_self = W_self[i], W_degrees = W_degrees[i], b_prime=b_prime[i], b_prime_self=b_prime_self[i], b_prime_out = b_prime_out[i], num_in_features = input_size, num_input = num_input, env_features = layer_input, env_neighbors = env_neighbors, env_degrees = env_degrees, mask = mask, layer_fp=layer_fp, fp_length=512 ) self.dA_layers.append(dA_layer) self.params.extend(dA_layer.params) def train_poc_autoencoder_II(learning_rate=0.001, pretraining_epochs=100, batch_size=5, reg=5e-9): [W_self_0, W_degrees_0, b_prime_0, b_prime_self_0, b_layer_0, W_self_1, W_degrees_1, b_prime_1, b_prime_self_1,b_layer_1] = load_autoencoder_I_W(file_name='../weights/weights_graph_autoencoder_I.zip') W_out_0 = None W_out_1 = None b_out_0 = None b_prime_out_0 = None b_out_1 = None b_prime_out_1 = None Weights =[W_self_0, W_degrees_0, W_out_0, W_self_1, W_degrees_1, W_out_1] Bias = [b_prime_0, b_prime_self_0,b_layer_0, b_out_0,b_prime_out_0, b_prime_1, b_prime_self_1,b_layer_1, b_out_1,b_prime_out_1] all_poc, max_ff, min_ff, mean_ff = get_all_pocs() n_train_batches = int(len(all_poc)/batch_size) # batch_size should be the number of paris of (pocket, mol) numpy_rng = numpy.random.RandomState(89677) print ('... building the model') batchsize = batch_size env_features = T.matrix('env_features', dtype=theano.config.floatX) env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX) env_mask = T.fvector('env_mask') env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX) index = T.lscalar('index') # index to a [mini]batch lr = T.scalar(name='lr') ############################################## ################## SDA ####################### ############################################## sda = SdA_II( numpy_rng=numpy_rng, n_ins=480, hidden_layers_sizes=[200, 100], corruption_levels=[0.05, 0.05], Weights = Weights, Bias = Bias, num_input = batch_size, env_features = env_features, env_neighbors = env_neighbors, env_degrees = env_degrees, mask = env_mask, layer_fp=None, fp_length=512, ) for i in range(sda.n_layers): training_dA = sda.dA_layers[i] cost, updates = training_dA.get_cost_updates(learning_rate) if i==0: pretrain_fn = theano.function([env_features, env_mask], cost, updates=updates) else: pretrain_fn = theano.function([env_features, env_neighbors, env_degrees, env_mask], cost, updates=updates) for epoch in range(pretraining_epochs): # go through the training set c = [] for batch_index in range(n_train_batches): if all_poc[batch_index]==[]: print ("all_poc[batch_index]==[]", batch_index) else: print ("all_poc[batch_index]", all_poc[batch_index]) env_features_train, env_neighbors_train, env_degrees_train, env_mask_train = get_pocket_attributes(all_poc[batch_index*batch_size:(batch_index+1)*batch_size],max_ff, min_ff, mean_ff) train_EF, train_EN, train_ED, train_env_mask = shared_dataset(env_features_train, env_neighbors_train, env_degrees_train, env_mask_train) if i==0: minibatch_cost = pretrain_fn(train_EF.eval(), train_env_mask.eval()) c.append(minibatch_cost) else: minibatch_cost = pretrain_fn(train_EF.eval(), train_EN.eval(), train_ED.eval(), train_env_mask.eval()) c.append(minibatch_cost) mean_cost = numpy.mean(c) print ('Pre-training layer %i, epoch %d, cost ' % (i, epoch)) print (mean_cost) list_file= open('../progress_poc_pretrain_SDA_W_out_Relu_soft_scPDB.txt','a') list_file.write('layer %i, epoch %d, cost ' % (i, epoch)) list_file.write(str(mean_cost)+'\n') list_file.close() #### dump weights ###### num_layers = sda.n_layers Weights=[] Bias=[] for k in range(num_layers): b_prime = sda.dA_layers[k].b_prime b_prime_self = sda.dA_layers[k].b_prime_self W_self = sda.dA_layers[k].Graph_CNN.W_self b_layer = sda.dA_layers[k].Graph_CNN.b_layer W_degrees = sda.dA_layers[k].Graph_CNN.W_degrees Weights.append([W_self,W_degrees]) Bias.append([b_prime,b_prime_self, b_layer]) for k in range(num_layers): W_out = sda.dA_layers[k].Graph_CNN.W_out b_out = sda.dA_layers[k].Graph_CNN.b_out W_prime_out = sda.dA_layers[k].W_prime_out b_prime_out = sda.dA_layers[k].b_prime_out Weights.extend([W_out,W_prime_out]) Bias.extend([b_out,b_prime_out]) dump_weights_pickle(num_layers, Weights, Bias, file_name='../weights/weights_graph_autoencoder_II.zip') def load_user_pockets(input_dir): # load all file names in '../data/User/ff' as test pockets test_pockets = [ f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir,f))] return test_pockets def eval_autoencoder(batch_size=5): file_name = '../weights/weights_graph_autoencoder_II.zip' [W_self_0, W_degrees_0, b_prime_0, b_prime_self_0, b_layer_0, W_out_0, b_out_0, b_prime_out_0, W_self_1, W_degrees_1, b_prime_1, b_prime_self_1, b_layer_1, W_out_1, b_out_1, b_prime_out_1]= load_autoencoder_II_W(file_name) Weights =[W_self_0, W_degrees_0, W_out_0, W_self_1, W_degrees_1, W_out_1] Bias = [b_prime_0, b_prime_self_0,b_layer_0, b_out_0,b_prime_out_0, b_prime_1, b_prime_self_1,b_layer_1, b_out_1,b_prime_out_1 ] all_poc, max_ff, min_ff, mean_ff = get_all_pocs() input_dir = '../data/DUDE_drugFEATURE_scPDB_ff/' # all_poc is the list of test pockets you would like to obtain the poc fps # you can replace this with a list of ff files # [1xxa_LIG.ff, 1xxb_LIG.ff, 1xxc_LIG.ff, 1xxd_LIG.ff ...] # we need the max_ff, min_ff, mean_ff, so we still need line 545! # OPTIONAL - if user define, uncomment line 554 and 555 # all_poc = load_user_pockets(input_dir='../data/User/ff') # input_dir = '../data/User/ff/' n_train_batches = int(len(all_poc)/batch_size) # batch_size should be the number of paris of (pocket, mol) numpy_rng = numpy.random.RandomState(89677) print ('... building the model') batchsize = batch_size env_features = T.matrix('env_features', dtype=theano.config.floatX) env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX) env_mask = T.fvector('env_mask') env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX) index = T.lscalar('index') # index to a [mini]batch lr = T.scalar(name='lr') ############################################## ################## SDA ####################### ############################################## sda = SdA_II( numpy_rng=numpy_rng, n_ins=480, hidden_layers_sizes=[200, 100], corruption_levels=[0.05, 0.05], Weights = Weights, Bias = Bias, num_input = batch_size, env_features = env_features, env_neighbors = env_neighbors, env_degrees = env_degrees, mask = env_mask, layer_fp=None, fp_length=512, ) for i in range(sda.n_layers): training_dA = sda.dA_layers[i] x = training_dA.Graph_CNN.env_features_reshaped y = training_dA.Graph_CNN.env_outputs fp = training_dA.Graph_CNN.fp_output z = training_dA.z if i ==0: recon_fn = theano.function([env_features, env_mask], [x,y,z,fp,training_dA.cost]) else: recon_fn = theano.function([env_features, env_neighbors, env_degrees, env_mask], [x,y,z,fp,training_dA.cost]) all_fp = [] test_poc_name = [] for batch_index in range(n_train_batches): env_features_train, env_neighbors_train, env_degrees_train, env_mask_train = get_pocket_attributes(all_poc[batch_index*batch_size:(batch_index+1)*batch_size],max_ff, min_ff, mean_ff, input_dir = input_dir) train_EF, train_EN, train_ED, train_env_mask = shared_dataset(env_features_train, env_neighbors_train, env_degrees_train, env_mask_train) if i==0: x_,y_,z_,fp_,cost_ = recon_fn(train_EF.eval(),train_env_mask.eval()) else: x_,y_,z_,fp_,cost_ = recon_fn(train_EF.eval(),train_EN.eval(), train_ED.eval(), train_env_mask.eval()) all_fp.append(fp_) test_poc_name.extend(all_poc[batch_index*batch_size:(batch_index+1)*batch_size]) all_fp = numpy.array(all_fp) all_fp.dump('../results/User/auto_poc_fp_layer_'+str(i)+'.dat') out_file = open('../results/User/auto_poc_fp_order.txt','w') for poc in test_poc_name: out_file.write(poc+'\n') out_file.close() if __name__ == '__main__': mode = sys.argv[1] if mode == 'train': train_poc_autoencoder_II() else: eval_autoencoder() # see the in-line comments for user defined pockets