import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from pprint import pprint
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from theano.tensor.nnet import conv3d2d
from scipy.io import matlab
import re
import math
from theano import shared
from collections import OrderedDict
from layers import *
from theano.misc.pkl_utils import dump
import tables
from pocket_ff_to_box_backbone import *
from graph_3DCNN import *

max_poc_degrees = 20
max_nodes_in_poc = 50

DEFAULT_NODE_NAME = "defaultNode";


def shared_dataset(BOX, env_neighbors, env_degrees, mask, labels, borrow=True):

	shared_BOX = theano.shared(numpy.asarray(BOX,dtype=theano.config.floatX),borrow=borrow)
	shared_env_neighbors = theano.shared(numpy.asarray(env_neighbors,dtype=theano.config.floatX),borrow=borrow)
	shared_env_degrees = theano.shared(numpy.asarray(env_degrees,dtype=theano.config.floatX),borrow=borrow)
	shared_mask = theano.shared(numpy.asarray(mask,dtype=theano.config.floatX),borrow=borrow)
	shared_labels = theano.shared(numpy.asarray(labels,dtype=theano.config.floatX),borrow=borrow)

	return shared_BOX, shared_env_neighbors, shared_env_degrees, shared_mask, T.cast(shared_labels, 'int32')


def load_weights_pickle():
	file_name = '../weights/weight_Design_Graph_skip_finetune.zip'
	pickle_keys = numpy.load(file_name).keys()
	W0 = numpy.load(file_name)[pickle_keys[0]]
	W1 = numpy.load(file_name)[pickle_keys[1]]
	W2 = numpy.load(file_name)[pickle_keys[2]]
	W_hid1 = numpy.load(file_name)[pickle_keys[3]]
   
	W_poc_self_0 = numpy.load(file_name)[pickle_keys[4]]
	W_poc_self_1 = numpy.load(file_name)[pickle_keys[5]]
	W_log = numpy.load(file_name)[pickle_keys[6]]
	W_atom_0 = numpy.load(file_name)[pickle_keys[7]]
	W_atom_1 = numpy.load(file_name)[pickle_keys[8]]
	W_atom_2 = numpy.load(file_name)[pickle_keys[9]]


	b0 = numpy.load(file_name)[pickle_keys[10]]
	b1 = numpy.load(file_name)[pickle_keys[11]]
	b2 = numpy.load(file_name)[pickle_keys[12]]
	b_hid1 = numpy.load(file_name)[pickle_keys[13]]
   
	b_poc_layer_0 = numpy.load(file_name)[pickle_keys[14]]
	b_poc_layer_1 = numpy.load(file_name)[pickle_keys[15]]

	b_log = numpy.load(file_name)[pickle_keys[16]]

	b_atom_0 = numpy.load(file_name)[pickle_keys[17]]
	b_atom_1 = numpy.load(file_name)[pickle_keys[18]]
	b_atom_2= numpy.load(file_name)[pickle_keys[19]]

	W_poc_degrees_0 = []
	for i in range(0,max_poc_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[20+i]], dtype=theano.config.floatX)
		W_d=theano.shared(value=W_d, name='W_poc_l_0_d_'+str(i), borrow=True)
		W_poc_degrees_0.append(W_d)

	W_poc_degrees_1 = []
	for i in range(0,max_poc_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[20+i+max_poc_degrees]], dtype=theano.config.floatX)
		W_d=theano.shared(value=W_d, name='W_poc_l_1_d_'+str(i), borrow=True)
		W_poc_degrees_1.append(W_d)


	W0 = numpy.asarray(W0, dtype=theano.config.floatX)
	W1 = numpy.asarray(W1, dtype=theano.config.floatX)
	W2 = numpy.asarray(W2, dtype=theano.config.floatX)
	W_hid1 = numpy.asarray(W_hid1, dtype=theano.config.floatX)
   
	W_poc_self_0 = numpy.asarray(W_poc_self_0, dtype=theano.config.floatX)
	W_poc_self_1 = numpy.asarray(W_poc_self_1, dtype=theano.config.floatX)
	W_log = numpy.asarray(W_log, dtype=theano.config.floatX)
	W_atom_0 = numpy.asarray(W_atom_0, dtype=theano.config.floatX)
	W_atom_1 = numpy.asarray(W_atom_1, dtype=theano.config.floatX)
	W_atom_2 = numpy.asarray(W_atom_2, dtype=theano.config.floatX)

	b0 = numpy.asarray(b0, dtype=theano.config.floatX)
	b1 = numpy.asarray(b1, dtype=theano.config.floatX)
	b2 = numpy.asarray(b2, dtype=theano.config.floatX)
	b_hid1 = numpy.asarray(b_hid1, dtype=theano.config.floatX)
   
	b_poc_layer_0 = numpy.asarray(b_poc_layer_0, dtype=theano.config.floatX)
	b_poc_layer_1 = numpy.asarray(b_poc_layer_1, dtype=theano.config.floatX)
	
	b_log = numpy.asarray(b_log, dtype=theano.config.floatX)

	b_atom_0 = numpy.asarray(b_atom_0, dtype=theano.config.floatX)
	b_atom_1 = numpy.asarray(b_atom_1, dtype=theano.config.floatX)
	b_atom_2= numpy.asarray(b_atom_2, dtype=theano.config.floatX)


	W0 = theano.shared(value=W0, name='W0', borrow=True)
	W1 = theano.shared(value=W1, name='W1', borrow=True)
	W2 = theano.shared(value=W2, name='W2', borrow=True)
	W_hid1 = theano.shared(value=W_hid1, name='W_hid1', borrow=True)

	W_poc_self_0 = theano.shared(value=W_poc_self_0, name='W_poc_self_0', borrow=True)
	W_poc_self_1 = theano.shared(value=W_poc_self_1, name='W_poc_self_1', borrow=True)
	W_log = theano.shared(value=W_log, name='W_log', borrow=True)
	W_atom_0 = theano.shared(value=W_atom_0, name='W_atom_0', borrow=True)
	W_atom_1 = theano.shared(value=W_atom_1, name='W_atom_1', borrow=True)
	W_atom_2 = theano.shared(value=W_atom_2, name='W_atom_2', borrow=True)
   
	b0 = theano.shared(value=b0, name='b0', borrow=True)
	b1 = theano.shared(value=b1, name='b1', borrow=True)
	b2 = theano.shared(value=b2, name='b2', borrow=True)
	b_hid1 = theano.shared(value=b_hid1, name='b_hid1', borrow=True)

	b_poc_layer_0 = theano.shared(value=b_poc_layer_0, name='b_poc_layer_0', borrow=True)
	b_poc_layer_1 = theano.shared(value=b_poc_layer_1, name='b_poc_layer_1', borrow=True)
	
	b_log = theano.shared(value=b_log, name='b_log', borrow=True)

	b_atom_0 = theano.shared(value=b_atom_0, name='b_atom_0', borrow=True)
	b_atom_1 = theano.shared(value=b_atom_1, name='b_atom_1', borrow=True)
	b_atom_2 = theano.shared(value=b_atom_2, name='b_atom_2', borrow=True)

	return [W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2, W_poc_degrees_0, W_poc_degrees_1]
	

def get_backbone_mean():
	mean = numpy.load("../../design_graph_cnn/data/train_mean.dat")
	return mean

def load_backbone_weights(dropout_rate = 0.3):
	file_name = '../weights/weight_3DCNN_backbone.zip'
	pickle_keys = numpy.load(file_name).keys()
	W0=numpy.load(file_name)[pickle_keys[0]]
	W1=numpy.load(file_name)[pickle_keys[1]]
	W2=numpy.load(file_name)[pickle_keys[2]]
	W3=numpy.load(file_name)[pickle_keys[3]]
	W4=numpy.load(file_name)[pickle_keys[4]]
	W5=numpy.load(file_name)[pickle_keys[5]]
	b0=numpy.load(file_name)[pickle_keys[6]]
	b1=numpy.load(file_name)[pickle_keys[7]]
	b2=numpy.load(file_name)[pickle_keys[8]]
	b3=numpy.load(file_name)[pickle_keys[9]]
	b4=numpy.load(file_name)[pickle_keys[10]]
	b5=numpy.load(file_name)[pickle_keys[11]]

	W0=numpy.asarray(W0, dtype=theano.config.floatX)
	W1=numpy.asarray(W1, dtype=theano.config.floatX)
	W2=numpy.asarray(W2, dtype=theano.config.floatX)
	W3=numpy.asarray(W3, dtype=theano.config.floatX)
	W4=numpy.asarray(W4, dtype=theano.config.floatX)
	W5=numpy.asarray(W5, dtype=theano.config.floatX)
	
	b0=numpy.asarray(b0, dtype=theano.config.floatX)
	b1=numpy.asarray(b1, dtype=theano.config.floatX)
	b2=numpy.asarray(b2, dtype=theano.config.floatX)
	b3=numpy.asarray(b3, dtype=theano.config.floatX)
	b4=numpy.asarray(b4, dtype=theano.config.floatX)
	b5=numpy.asarray(b5, dtype=theano.config.floatX) 

	W0=theano.shared(value=W0*(1 - dropout_rate), name='W0', borrow=True)
	W1=theano.shared(value=W1*(1 - dropout_rate), name='W1', borrow=True)
	W2=theano.shared(value=W2*(1 - dropout_rate), name='W2', borrow=True)
	W3=theano.shared(value=W3*(1 - dropout_rate), name='W3', borrow=True)
	W4=theano.shared(value=W4*(1 - dropout_rate), name='W4', borrow=True)
	W5=theano.shared(value=W5, name='W5', borrow=True)

	b0=theano.shared(value=b0*(1 - dropout_rate), name='b0', borrow=True)
	b1=theano.shared(value=b1*(1 - dropout_rate), name='b1', borrow=True)
	b2=theano.shared(value=b2*(1 - dropout_rate), name='b2', borrow=True)
	b3=theano.shared(value=b3*(1 - dropout_rate), name='b3', borrow=True)
	b4=theano.shared(value=b4*(1 - dropout_rate), name='b4', borrow=True)
	b5=theano.shared(value=b5, name='b5', borrow=True)

	return [W0,W1,W2,W3,W4,W5,b0,b1,b2,b3,b4,b5]

def dump_weights_pickle(Weights, Bias, file_name):

	[W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, W_poc_degrees_0, W_poc_degrees_1] = Weights
	[b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2] = Bias

	w_poc_degree_0_tuple = ()
	for i in range(0,max_poc_degrees):
		w_poc_degree_0_tuple = w_poc_degree_0_tuple + (W_poc_degrees_0[i],)

	w_poc_degree_1_tuple = ()
	for i in range(0,max_poc_degrees):
		w_poc_degree_1_tuple = w_poc_degree_1_tuple + (W_poc_degrees_1[i],)

	all_var_tuple=(W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2)+w_poc_degree_0_tuple+w_poc_degree_1_tuple
	
	with open(file_name, 'wb') as f:
		dump(all_var_tuple, f)

class Conv_3d_Layer(object):

	def __init__(self, rng, input, filter_shape, image_shape, W=None, b=None):
		
		self.input=input
		assert image_shape[2] == filter_shape[2]
		
		fan_in = numpy.prod(filter_shape[2:])
		fan_out = (filter_shape[0] * numpy.prod(filter_shape[3:]))
		W_bound = numpy.sqrt(6. / (fan_in + fan_out))
		if W is None:
			self.W = theano.shared(numpy.asarray(
				rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
				dtype=theano.config.floatX),
								   borrow=True, name='W')
		else:
			self.W = W

		if b is None:
			b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
			self.b = theano.shared(value=b_values, borrow=True, name='b')
		else:
			self.b = b

		conv_out5D = conv3d2d.conv3d(signals=input, filters=self.W,
				signals_shape=image_shape, filters_shape=filter_shape)

		self.output = relu(conv_out5D + self.b.dimshuffle('x', 0, 'x', 'x'))
		self.params = [self.W, self.b]


class Conv_Pool(object):
	def __init__(self, rng, input, filter_shape, image_shape, W=None, b=None):
		(batchsize, in_depth, in_channels, in_height, in_width)=image_shape
		fan_in = numpy.prod(filter_shape[2:])
		fan_out = (filter_shape[0] * numpy.prod(filter_shape[3:]))
		W_bound = numpy.sqrt(6. / (fan_in + fan_out))
		if W is None:
			self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),dtype=theano.config.floatX),borrow=True)
		else:
			self.W = W
		if b is None:
			b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
			self.b = theano.shared(value=b_values, borrow=True)
		else:
			self.b = b

		self.input=input
	  
		conv_layer = Conv_3d_Layer(rng=rng, input=input, filter_shape=filter_shape, image_shape=image_shape, W=self.W, b=self.b)
		pool_layer = PoolLayer3D(input=conv_layer.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) 
		output = pool_layer.output.dimshuffle(0,2,1,3,4)
		self.output = output
		self.params = [self.W, self.b]

	   
class LogisticRegression(object):
   

	def __init__(self, input, n_in, n_out, W=None, b=None):

		if W is None:
			self.W = theano.shared(
					value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX),
					name='W')
		else:
			self.W = W

		if b is None:
			self.b = theano.shared(
					value=numpy.zeros((n_out,), dtype=theano.config.floatX),
					name='b')
		else:
			self.b = b

		self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

		self.y_pred = T.argmax(self.p_y_given_x, axis=1)
		self.y_prob = self.p_y_given_x
		self.score = T.dot(input, self.W) + self.b
	   
		self.params = [self.W, self.b]

	def negative_log_likelihood(self, y, mask):
		return -T.sum(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]*mask)/T.sum(mask)

	def class_score(self, y):
		return (self.score)[0,y]

	def neq(self, y):
		if y.ndim != self.y_pred.ndim:
			raise TypeError(
				'y should have the same shape as self.y_pred',
				('y', y.type, 'y_pred', self.y_pred.type)
			)
		
		return T.neq(self.y_pred, y)
	
	def neq_mask(self, y, mask):
		if y.ndim != self.y_pred.ndim:
			raise TypeError(
				'y should have the same shape as self.y_pred',
				('y', y.type, 'y_pred', self.y_pred.type)
			)
		
		return T.neq(self.y_pred, y)*mask

	def sum_neq_mask(self, y, mask):
		if y.ndim != self.y_pred.ndim:
			raise TypeError(
				'y should have the same shape as self.y_pred',
				('y', y.type, 'y_pred', self.y_pred.type)
			)
		
		return T.sum(T.neq(self.y_pred, y)*mask)
	   

	def errors(self, y, mask):
		if y.ndim != self.y_pred.ndim:
			raise TypeError(
				'y should have the same shape as self.y_pred',
				('y', y.type, 'y_pred', self.y_pred.type)
			)

		if y.dtype.startswith('int'):
			return T.sum(T.neq(self.y_pred, y)*mask)/T.sum(mask)
		else:
			raise NotImplementedError()


class Design_Graph(object):
	def __init__(self,
			rng,
			x_input,
			y_input,
			Weights,
			Bias,
			batch_size,
			env_neighbors,
			env_mask,
			env_degrees,
			num_poc_hidden_features=[100,100], 
			dim_x=10800, 
			use_bias=True):

		use_bias=True
			
		filter_w=3
		num_3d_pixel=20
		in_channels=4

		layer0_w = num_3d_pixel 
		layer0_h = num_3d_pixel
		layer0_d = num_3d_pixel

		layer1_w = (layer0_w-3+1) #14
		layer1_h = (layer0_h-3+1)
		layer1_d = (layer0_d-3+1)

		layer2_w = (layer1_w-3+1)/2 #14
		layer2_h = (layer1_h-3+1)/2
		layer2_d = (layer1_d-3+1)/2

		layer3_w = (layer2_w-3+1)/2
		layer3_h = (layer2_h-3+1)/2
		layer3_d = (layer2_d-3+1)/2
		
		
		######################
		# BUILD ACTUAL MODEL #
		######################
	   
		print '... building the model'
		# image sizes
		batchsize     = batch_size
		in_time       = num_3d_pixel
		in_width      = num_3d_pixel
		in_height     = num_3d_pixel
		#filter sizes
		flt_channels  = 100
		flt_time      = filter_w
		flt_width     = filter_w
		flt_height    = filter_w

		signals_shape0 = (max_nodes_in_poc * batchsize, in_time, in_channels, in_height, in_width)
		filters_shape0 = (flt_channels, 3, in_channels, 3, 3)
		signals_shape1 = (max_nodes_in_poc * batchsize, layer1_d, flt_channels, layer1_h, layer1_w)
		filters_shape1 = (flt_channels*2, 3, flt_channels, 3, 3)
		signals_shape2 = (max_nodes_in_poc * batchsize, layer2_d, flt_channels*2, layer2_h, layer2_w)
		filters_shape2 = (flt_channels*4, 3, flt_channels*2, 3, 3)
		
		layer0_input = x_input.reshape(signals_shape0) #20

		[W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, W_poc_degrees_0, W_poc_degrees_1] = Weights
		[b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2] = Bias 

	   
		if y_input is None:
			self.y = T.imatrix('y')
		else:
			self.y = y_input

		self.mask = env_mask
		self.tparams = OrderedDict()

		self.layer0 = Conv_3d_Layer(rng=rng, input=layer0_input, #18
				image_shape=signals_shape0,
				filter_shape=filters_shape0, 
				W=W0,
				b=b0,
				)
		self.tparams['W0'] = self.layer0.W
		self.tparams['b0'] = self.layer0.b

	   
		next_layer_input = self.layer0.output
	   

		self.layer1 = Conv_Pool(rng=rng, 
				input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 
				image_shape=signals_shape1,
				filter_shape=filters_shape1,
				W=W1,
				b=b1,
				)
		self.tparams['W1'] = self.layer1.W
		self.tparams['b1'] = self.layer1.b


		next_layer_input = self.layer1.output
	  

		self.layer2 = Conv_Pool(rng=rng, 
				input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 
				image_shape=signals_shape2,
				filter_shape=filters_shape2,
				W=W2,
				b=b2,
				)

	   
		next_layer_input = self.layer2.output.flatten(2)
		self.tparams['W2'] = self.layer2.W
		self.tparams['b2'] = self.layer2.b


		self.HiddenLayer1=HiddenLayer(
			rng = rng,
			input=next_layer_input,
			n_in=dim_x,
			n_out=1000,
			W=W_hid1,
			b=b_hid1,
			activation=relu
			)

		self.tparams['W_hid1'] = self.HiddenLayer1.W
		self.tparams['b_hid1'] = self.HiddenLayer1.b

		#########################  Poc Graph  ##########################

		self.Graph_poc_layer_0 = Graph_Conv(rng=rng, 
			b_layer=b_poc_layer_0, 
			W_self=W_poc_self_0, 
			W_degrees=W_poc_degrees_0, 
			num_in_features=1000,
			num_hidden_features=100, 
			num_input=batch_size,
			env_features=self.HiddenLayer1.output.flatten(2), 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask,
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees
			)

		self.tparams['W_poc_self_0'] = self.Graph_poc_layer_0.W_self
		self.tparams['b_poc_layer_0'] = self.Graph_poc_layer_0.b_layer


		for d in range(0,max_poc_degrees):
			self.tparams['W_poc_l_0_d_'+str(d)] = self.Graph_poc_layer_0.W_degrees[d]

		####################################################

		self.Graph_poc_layer_1 = Graph_Conv(rng=rng, 
			b_layer=b_poc_layer_1, 
			W_self=W_poc_self_1, 
			W_degrees=W_poc_degrees_1, 
			num_in_features=100,
			num_hidden_features=100, 
			num_input=batch_size,
			env_features=self.Graph_poc_layer_0.layer_conv, 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask,
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees
			)

		self.tparams['W_poc_self_1'] = self.Graph_poc_layer_1.W_self
		self.tparams['b_poc_layer_1'] = self.Graph_poc_layer_1.b_layer

		for d in range(0,max_poc_degrees):
			self.tparams['W_poc_l_1_d_'+str(d)] = self.Graph_poc_layer_1.W_degrees[d]


		self.atom_0_features = self.HiddenLayer1.output
		self.atom_1_features = self.Graph_poc_layer_0.layer_conv
		self.atom_2_features = self.Graph_poc_layer_1.layer_conv

		self.trans_atom_0=HiddenLayer(
			rng = rng,
			input=self.atom_0_features,
			n_in=1000,
			n_out=100,
			W=W_atom_0,
			b=b_atom_0,
			activation=relu
			)

		self.tparams['W_atom_0'] = self.trans_atom_0.W
		self.tparams['b_atom_0'] = self.trans_atom_0.b

		self.trans_atom_1=HiddenLayer(
			rng = rng,
			input=self.atom_1_features,
			n_in=100,
			n_out=100,
			W=W_atom_1,
			b=b_atom_1,
			activation=relu
			)
		self.tparams['W_atom_1'] = self.trans_atom_1.W
		self.tparams['b_atom_1'] = self.trans_atom_1.b

		self.trans_atom_2=HiddenLayer(
			rng = rng,
			input=self.atom_2_features,
			n_in=100,
			n_out=100,
			W=W_atom_2,
			b=b_atom_2,
			activation=relu
			)
		self.tparams['W_atom_2'] = self.trans_atom_2.W
		self.tparams['b_atom_2'] = self.trans_atom_2.b

		self.integrate_feature = self.trans_atom_0.output + self.trans_atom_1.output + self.trans_atom_2.output

		self.logLayer = LogisticRegression(
			input=self.integrate_feature,
			n_in=100,
			n_out=20,
			W=W_log, 
			b=b_log
		)

		self.tparams['W_log'] = self.logLayer.W
		self.tparams['b_log'] = self.logLayer.b

		self.pred = self.logLayer.y_pred
		self.prob = self.logLayer.y_prob
		self.errors = self.logLayer.errors(self.y, self.mask)
		self.finetune_cost = self.logLayer.negative_log_likelihood(self.y, self.mask)

	   
# def load_data():
# 	dat_dir = '../data/PROTEIN_SEQ_PTS/train/'
# 	train_data = [ f for f in os.listdir(dat_dir) if os.path.isfile(os.path.join(dat_dir,f))]
# 	return train_data
# def load_test_data():
# 	dat_dir = '../data/PROTEIN_SEQ_PTS/test/'
# 	test_data = [ f for f in os.listdir(dat_dir) if os.path.isfile(os.path.join(dat_dir,f))]
# 	return test_data


def load_data():
	dat_dir = '../data/Protein_backbone_patch/train/'
	train_data = [ f for f in os.listdir(dat_dir) if os.path.isfile(os.path.join(dat_dir,f))]
	return train_data
def load_test_data():
	dat_dir = '../data/Protein_backbone_patch/test/'
	test_data = [ f for f in os.listdir(dat_dir) if os.path.isfile(os.path.join(dat_dir,f))]
	return test_data


def train_Design_Graph(finetune_lr=0.003, training_epochs=10, batch_size=1, reg=5e-9):
	[W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5] = load_backbone_weights()
	W_hid1 = W3
	b_hid1 = b3
	W_atom_0 = W4
	b_atom_0 = b4
	W_log = W5
	b_log = b5
	W_poc_self_0 = None
	W_poc_self_1 = None
	W_atom_1 = None
	W_atom_2 = None
	W_poc_degrees_0 = None
	W_poc_degrees_1 = None
	b_poc_layer_0 = None
	b_poc_layer_1 = None
	b_atom_1 = None
	b_atom_2 = None

	Weights =[W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, W_poc_degrees_0, W_poc_degrees_1] 
	Bias =[b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2] 

	train_data = load_data()
	test_data = load_test_data()

	back_mean = get_backbone_mean()
	n_train_batches = int(len(train_data)/batch_size)
	n_test_batches = int(len(test_data)/batch_size) 

	numpy_rng = numpy.random.RandomState(89677)
	print '... building the model'

	dtensor5 = T.TensorType('float32', (False,)*5)
	x = dtensor5('x')
	labels = T.ivector('labels')
	env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX)
	env_mask = T.fvector('env_mask')
	env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX)

	s_cnn_da = Design_Graph(
		rng=numpy_rng,
		x_input=x,
		y_input=labels,
		Weights=Weights,
		Bias=Bias,
		batch_size=batch_size,
		env_neighbors=env_neighbors,
		env_mask=env_mask,
		env_degrees=env_degrees,
		num_poc_hidden_features=[100,100], 
		dim_x=10800, 
		use_bias=True
		)

	# compute number of minibatches for training, validation and testing
	index = T.lscalar('index')  # index to a [mini]batch
	lr = T.scalar(name='lr')

	tparams = s_cnn_da.tparams
	cost = s_cnn_da.finetune_cost#+(reg*s_cnn_da.L2_sqr)
	grads = T.grad(cost, tparams.values())

	zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
	running_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad' % k) for k, p in tparams.iteritems()]
	running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k)for k, p in tparams.iteritems()]

	zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
	rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
	rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
			 for rg2, g in zip(running_grads2, grads)]


	train_fn = theano.function([x, env_neighbors, env_degrees, env_mask, labels], cost,
									updates=zgup + rgup + rg2up,
									name='rmsprop_f_grad_shared')


	updir = [theano.shared(p.get_value() * numpy_floatX(0.),
						   name='%s_updir' % k)
			 for k, p in tparams.iteritems()]

	adam_lr = 1e-6
	eps = 1e-6

	updir_new = [(ud, 0.9 * ud - adam_lr * zg / T.sqrt(rg2 - rg ** 2 + eps))
				 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
											running_grads2)]
	param_up = [(p, p + udn[1])
				for p, udn in zip(tparams.values(), updir_new)]
	f_update = theano.function([lr], [], updates=updir_new + param_up,
							   on_unused_input='ignore',
							   name='rmsprop_f_update')


	index = T.lscalar('index')  # index to a [mini]batch
	train_score_i = theano.function([x, env_neighbors, env_degrees, env_mask, labels], s_cnn_da.errors, name='train')

	print '... finetunning the model'
	patience = 100000  # look as this many examples regardless
	patience_increase = 2.  # wait this much longer when a new best is
							# found
	improvement_threshold = 0.995  # a relative improvement of this much is
								   # considered significant
	validation_frequency = min(n_train_batches, patience / 2)
	best_validation_loss = numpy.inf
	best_test_score = numpy.inf
	test_score = 0.
	start_time = time.clock()

	done_looping = False
	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		epoch = epoch + 1
		train_losses=[]
	   
		for minibatch_index in xrange(0,n_train_batches):
			mod_test = minibatch_index%20000
			mod = minibatch_index%5000
			env_valid, env_features_train, env_neighbors_train, env_degrees_train, env_mask_train, env_labels_train = get_pocket_attributes(pts_list=train_data[minibatch_index * batch_size: (minibatch_index + 1) * batch_size],input_dir='../data/Protein_backbone_patch/train/', pdb_dir = '../../3DCNN_data_backbone/data/PDB_family_train')
			if env_valid:
				list_file= open('../progress_Design_Graph_skip_finetune.txt','a')
				list_file.write("mod:"+str(mod)+'\n')
				list_file.close()
				train_BOX, train_EN, train_ED, train_env_mask, train_labels = shared_dataset(env_features_train, env_neighbors_train, env_degrees_train, env_mask_train, env_labels_train)
				train_BOX -= back_mean
				train_BOX = train_BOX.dimshuffle(0,4,1,2,3)
				minibatch_avg_cost = train_fn(train_BOX.eval(), train_EN.eval(), train_ED.eval(), train_env_mask.eval(), train_labels.eval())
				train_err = train_score_i(train_BOX.eval(), train_EN.eval(), train_ED.eval(), train_env_mask.eval(), train_labels.eval())
				train_losses.append(train_err)
				
				list_file= open('../progress_Design_Graph_skip_finetune.txt','a')
				list_file.write('epoch %i, minibatch_index %i, n_train_batches %i, train error %f %%' %(epoch, minibatch_index, n_train_batches, train_err * 100.))
				list_file.write('\n')
				list_file.close()
				
				f_update(finetune_lr)
				iter = (epoch - 1) * n_train_batches + minibatch_index

				if mod==0 and minibatch_index!=0:
					train_losses_=numpy.array(train_losses)
					this_train_loss=numpy.mean(train_losses_)
				   
					result_file= open('../result_Design_Graph_skip_finetune.txt','a')
					result_file.write('epoch %i, minibatch_index %i, train error %f %%' %(epoch, minibatch_index, this_train_loss * 100.))
					result_file.write('\n')
				   
					result_file.close()

					W0 = s_cnn_da.layer0.W
					b0 = s_cnn_da.layer0.b
					W1 = s_cnn_da.layer1.W
					b1 = s_cnn_da.layer1.b
					W2 = s_cnn_da.layer2.W
					b2 = s_cnn_da.layer2.b
					W_hid1 = s_cnn_da.HiddenLayer1.W
					b_hid1 = s_cnn_da.HiddenLayer1.b

					W_poc_self_0 = s_cnn_da.Graph_poc_layer_0.W_self
					b_poc_layer_0 = s_cnn_da.Graph_poc_layer_0.b_layer

					W_poc_self_1 = s_cnn_da.Graph_poc_layer_1.W_self
					b_poc_layer_1 = s_cnn_da.Graph_poc_layer_1.b_layer

					W_atom_0 = s_cnn_da.trans_atom_0.W
					W_atom_1 = s_cnn_da.trans_atom_1.W
					W_atom_2 = s_cnn_da.trans_atom_2.W

					b_atom_0 = s_cnn_da.trans_atom_0.b
					b_atom_1 = s_cnn_da.trans_atom_1.b
					b_atom_2 = s_cnn_da.trans_atom_2.b

					W_log = s_cnn_da.logLayer.W
					b_log = s_cnn_da.logLayer.b


					W_poc_degrees_0 = []
					for d in range(0,max_poc_degrees):
						W_poc_degrees_0.append(s_cnn_da.Graph_poc_layer_0.W_degrees[d])

					W_poc_degrees_1 = []
					for d in range(0,max_poc_degrees):
						W_poc_degrees_1.append(s_cnn_da.Graph_poc_layer_1.W_degrees[d])
					

					Weights = [W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, W_poc_degrees_0, W_poc_degrees_1] 
					Bias = [b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2] 

					dump_weights_pickle(Weights, Bias, file_name='../weights/weight_Design_Graph_skip_finetune.zip')

				if mod_test==0 and minibatch_index!=0:
					test_losses=[]
				  
					for i in xrange(n_test_batches):
						env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, env_labels_test = get_pocket_attributes(pts_list=test_data[i * batch_size: (i + 1) * batch_size],input_dir='../data/Protein_backbone_patch/test/', pdb_dir = '../../3DCNN_data_backbone/data/PDB_family_test')
						test_BOX, test_EN, test_ED, test_env_mask, test_labels = shared_dataset(env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, env_labels_test)
						test_BOX -= back_mean
						test_BOX = test_BOX.dimshuffle(0,4,1,2,3)
						test_err=train_score_i(test_BOX.eval(), test_EN.eval(), test_ED.eval(), test_env_mask.eval(), test_labels.eval())
						test_losses.append(test_err)
						
						  
					test_losses=numpy.array(test_losses)
					
					this_test_loss=numpy.mean(test_losses)
				  
					result_file= open('../result_Design_Graph_skip_finetune.txt','a')
					result_file.write('epoch %i, test error %f %%' %
						  (epoch, this_test_loss * 100.))
					result_file.write('\n')
					result_file.close()
				 

def eval_Design_Graph(batch_size=1):
	
	[W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2, W_poc_degrees_0, W_poc_degrees_1]=load_weights_pickle()
	
	Weights =[W0, W1, W2, W_hid1, W_poc_self_0, W_poc_self_1, W_log, W_atom_0, W_atom_1, W_atom_2, W_poc_degrees_0, W_poc_degrees_1] 
	Bias =[b0, b1, b2, b_hid1, b_poc_layer_0, b_poc_layer_1, b_log, b_atom_0, b_atom_1, b_atom_2] 

	print ("loading data")
	test_data = load_test_data()
	back_mean = get_backbone_mean()
   
	n_test_batches = int(len(test_data)/batch_size) 

	numpy_rng = numpy.random.RandomState(89677)
	print '... building the model'

	dtensor5 = T.TensorType('float32', (False,)*5)
	x = dtensor5('x')
	labels = T.ivector('labels')
	env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX)
	env_mask = T.fvector('env_mask')
	env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX)


	s_cnn_da = Design_Graph(
		rng=numpy_rng,
		x_input=x,
		y_input=labels,
		Weights=Weights,
		Bias=Bias,
		batch_size=batch_size,
		env_neighbors=env_neighbors,
		env_mask=env_mask,
		env_degrees=env_degrees,
		num_poc_hidden_features=[100,100], 
		dim_x=10800, 
		use_bias=True
		)

	index = T.lscalar('index')  # index to a [mini]batch
	lr = T.scalar(name='lr')
	tparams = s_cnn_da.tparams
	cost = s_cnn_da.finetune_cost#+(reg*s_cnn_da.L2_sqr)
	train_score_i = theano.function([x, env_neighbors, env_degrees, env_mask, labels], s_cnn_da.errors, name='train')
	pred_fn = theano.function([x, env_neighbors, env_degrees], [s_cnn_da.pred,s_cnn_da.prob], name='pred')

	test_losses=[]
	
	for i in xrange(n_test_batches):
		test_name=test_data[i * batch_size: (i + 1) * batch_size][0]
		f=open('../eval_Design_Graph_skip_finetune.txt','a')
		f.write(test_name+': test error ')
		f.close()
		env_valid, env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, env_labels_test = get_pocket_attributes(pts_list=test_data[i * batch_size: (i + 1) * batch_size],input_dir='../data/Protein_backbone_patch/test/', pdb_dir = '../../3DCNN_data_backbone/data/PDB_family_test')
		if env_valid==True:	
			test_BOX, test_EN, test_ED, test_env_mask, test_labels = shared_dataset(env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, env_labels_test)
			test_BOX -= back_mean
			test_BOX = test_BOX.dimshuffle(0,4,1,2,3)
			test_err=train_score_i(test_BOX.eval(), test_EN.eval(), test_ED.eval(), test_env_mask.eval(), test_labels.eval())
			# The current test error is the average error over the whole pocket
			# If only interested in the pocket center residue prediction, retreive the error of node 0 (first node in each pocket)
			# (will need to redefine the error function in LogisticRegression)

			[test_pred, test_prob] = pred_fn(test_BOX.eval(), test_EN.eval(), test_ED.eval())
			true = env_labels_test
			mask = env_mask_test
			pred = test_pred
			prob = test_prob
			print (test_name+': test error '+str(test_err))
			f=open('../eval_Design_Graph_skip_finetune.txt','a')
			f.write(str(test_err)+'\n')
			f.close()
			test_losses.append(test_err)

	f=open('../eval_Design_Graph_skip_finetune.txt','a')
	f.write("test accuracy:"+'\n')
	f.write(str(1-numpy.mean(test_losses))+'\n')
	f.close()


if __name__ == '__main__':
	mode = sys.argv[1]
	if mode == 'train':
		train_Design_Graph()
	else:
		eval_Design_Graph()