import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from pprint import pprint
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from theano.tensor.nnet import conv3d2d
from scipy.io import matlab
import re
import math
from theano import shared
from collections import OrderedDict
from store_pytable import *
from layers import *
from theano.misc.pkl_utils import dump

def dump_weights_pickle(classifier,file_name):
	W0 = classifier.params[0]
	W1 = classifier.params[2]
	W2 = classifier.params[4]
	W3 = classifier.params[6]
	W4 = classifier.params[8]
	W5 = classifier.params[10]

	b0 = classifier.params[1]
	b1 = classifier.params[3]
	b2 = classifier.params[5]
	b3 = classifier.params[7]
	b4 = classifier.params[9]
	b5 = classifier.params[11]

	with open(file_name, 'wb') as f:
		dump((W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5), f)


def shared_dataset(data_x, data_y, borrow=True):

	shared_x = theano.shared(numpy.asarray(data_x,
										   dtype=theano.config.floatX),
							 borrow=borrow)
	shared_y = theano.shared(numpy.asarray(data_y,
										   dtype=theano.config.floatX),
							 borrow=borrow)

	return shared_x, T.cast(shared_y, 'int32')


def numpy_floatX(data):
	return numpy.asarray(data, dtype=theano.config.floatX)


def load_ATP_data(data_ratio):

	dataName = "data";
	labelName = "label";
	labelShape = [];


	pos_or_neg = 'pos'
	ID = 'train'+'_'+'pos'
	filename_train_pos = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_pos, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_train_pos=dataColumn_train[:]
	y_train_pos=labelColumn_train[:]
	
	pos_or_neg = 'neg'
	ID = 'train'+'_'+'neg'
	filename_train_neg = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_neg, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_train_neg=dataColumn_train[:]
	y_train_neg=labelColumn_train[:]


	pos_or_neg = 'pos'
	ID = 'test'+'_'+'pos'
	filename_train_pos = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_pos, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_test_pos=dataColumn_train[:]
	y_test_pos=labelColumn_train[:]
	
	pos_or_neg = 'neg'
	ID = 'test'+'_'+'neg'
	filename_train_neg = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_neg, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_test_neg=dataColumn_train[:]
	y_test_neg=labelColumn_train[:]

	num_of_pos_test = X_test_pos.shape[0]
	num_of_neg_test = X_test_neg.shape[0]

	Xt=numpy.concatenate((X_test_pos, X_test_neg), axis=0)
	yt=numpy.concatenate((y_test_pos, y_test_neg), axis=0)

	s = [X_train_pos.shape[0],X_train_neg.shape[0]]
	data_size = min(s)
	min_index = s.index(data_size)
	print "pos:"+str(s[0])
	print "neg:"+str(s[1])

	X_pos_all = []
	y_pos_all = []
	X_neg_all = []
	y_neg_all = []
   
	if(min_index==0):
		ratio = float(s[1])/s[0]
		r = int(numpy.floor(ratio))
		r = min(r,data_ratio)
		print "more neg, ratio="+str(ratio)
		for i in range (0,r):
			X_pos_all.append(X_train_pos)
			y_pos_all.append(y_train_pos)
			X_neg_all.append(X_train_neg[i*data_size:(i+1)*data_size])
			y_neg_all.append(y_train_neg[i*data_size:(i+1)*data_size])
			
	elif(min_index==1):
		ratio = float(s[0])/s[1]
		r = int(numpy.floor(ratio))
		r = min(r,data_ratio)
		print "more pos ,ratio="+str(ratio)
		for i in range (0,r):
			X_pos_all.append(X_train_pos[i*data_size:(i+1)*data_size])
			y_pos_all.append(y_train_pos[i*data_size:(i+1)*data_size])
			X_neg_all.append(X_train_neg)
			y_neg_all.append(y_train_neg)


	equal_examples=[]
	equal_labels=[]

	for i in range (0,len(X_pos_all)):
		equal_examples.append(numpy.concatenate((X_pos_all[i], X_neg_all[i]), axis=0))
		equal_labels.append(numpy.concatenate((y_pos_all[i], y_neg_all[i]), axis=0))
	
	equal_examples = numpy.array(equal_examples)
	equal_labels=numpy.array(equal_labels)

	equal_examples = numpy.reshape(equal_examples,(equal_examples.shape[0]*equal_examples.shape[1], 4, 20, 20, 20))
	equal_labels = equal_labels.flatten()

	num_of_train = int(19*float(equal_examples.shape[0])/20)
	num_of_val = int(1*float(equal_examples.shape[0])/20)
   
	print num_of_train
	print num_of_val
	
	mask_train = random.sample(xrange(equal_examples.shape[0]), num_of_train)
	X = equal_examples[mask_train]
	y = equal_labels[mask_train]
	
	equal_examples = numpy.delete(equal_examples, mask_train, 0)
	equal_labels = numpy.delete(equal_labels, mask_train, 0)
	
	mask_val=random.sample(xrange(equal_examples.shape[0]), num_of_val)
	Xv =equal_examples[mask_val]
	yv = equal_labels[mask_val]
	
	equal_examples = numpy.delete(equal_examples, mask_val, 0)
	equal_labels = numpy.delete(equal_labels, mask_val, 0)

	print "equal_examples size in the end:"
	print equal_examples.shape
	print "equal_labels size in the end:"
	print equal_labels.shape

	print "train X size: "
	print X.shape
	print "val X size: "
	print Xv.shape
	
	print "train y size: "
	print y.shape
	print "val y size: "
	print yv.shape

	all_train_x=[]
	all_train_y=[]
	all_train_sizes=[]
	train_size = X.shape[0]
	part = train_size/20
	for i in range (0,20):
		mask = range(part*i,part*(i+1))
		Xtr=X[mask]
		ytr=y[mask]
		all_train_x.append(Xtr)
		all_train_y.append(ytr)
		all_train_sizes.append(Xtr.shape[0])

	all_examples=[all_train_x,Xt,Xv]
	all_labels=[all_train_y,yt,yv]
  
	#datasets = [(all_train_x, all_train_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
	return [all_examples, all_labels, 4, all_train_sizes, Xt.shape[0], Xv.shape[0]]


def _dropout_from_layer(rng, layer, p):
	"""p is the probablity of dropping a unit
	"""
	srng = theano.tensor.shared_randomstreams.RandomStreams(
			rng.randint(999999))
	# p=1-p because 1's indicate keep and p is prob of dropping
	mask = srng.binomial(n=1, p=1-p, size=layer.shape)
	# The cast is important because
	# int * float32 = float64 which pulls things off the gpu
	output = layer * T.cast(mask, theano.config.floatX)
	return output


class ConvDropNet(object):
	"""A multilayer perceptron with all the trappings required to do dropout
	training.

	"""
	def __init__(self,
			rng,
			input,
			in_channels,
			dropout_rates,
			batch_size,
			use_bias=True):

		#rectified_linear_activation = lambda x: T.maximum(0.0, x)

		# Set up all the hidden layers
		#weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:])
		self.layers = []
		self.dropout_layers = []
		
		#######
		filter_w=3
		num_3d_pixel=20

		layer0_w = num_3d_pixel 
		layer0_h = num_3d_pixel
		layer0_d = num_3d_pixel

		layer1_w = (layer0_w-3+1) #14
		layer1_h = (layer0_h-3+1)
		layer1_d = (layer0_d-3+1)

		layer2_w = (layer1_w-3+1)/2 #14
		layer2_h = (layer1_h-3+1)/2
		layer2_d = (layer1_d-3+1)/2

		layer3_w = (layer2_w-3+1)/2
		layer3_h = (layer2_h-3+1)/2
		layer3_d = (layer2_d-3+1)/2
		
		
		######################
		# BUILD ACTUAL MODEL #
		######################
	   
		print '... building the model'
		# image sizes
		batchsize     = batch_size
		in_time       = num_3d_pixel
		in_width      = num_3d_pixel
		in_height     = num_3d_pixel
		#filter sizes
		flt_channels  = 100
		flt_time      = filter_w
		flt_width     = filter_w
		flt_height    = filter_w

		signals_shape0 = (batchsize, in_time, in_channels, in_height, in_width)
		filters_shape0 = (flt_channels, 3, in_channels, 3, 3)
		signals_shape1 = (batchsize, layer1_d, flt_channels, layer1_h, layer1_w)
		filters_shape1 = (flt_channels*2, 3, flt_channels, 3, 3)
		signals_shape2 = (batchsize, layer2_d, flt_channels*2, layer2_h, layer2_w)
		filters_shape2 = (flt_channels*4, 3, flt_channels*2, 3, 3)
		
		
		#next_layer_input = input
		#first_layer = True
		# dropout the input

		layer0_input = input.reshape(signals_shape0) #20
		layer0_dropout_input = _dropout_from_layer(rng, layer0_input, p=dropout_rates[0])
		self.tparams = OrderedDict()

		# W0: flt_channels, flt_time, in_channels, flt_height, flt_width

		###################################
		Dropout_layer0 = Dropout_Conv_3d_Layer(rng=rng,
					input=layer0_dropout_input,
					image_shape=signals_shape0,
					filter_shape=filters_shape0,                    
					dropout_rate=dropout_rates[1])

		self.dropout_layers.append(Dropout_layer0)
		next_dropout_input = Dropout_layer0.output
		self.tparams['W_0'] = Dropout_layer0.W
		self.tparams['b_0'] = Dropout_layer0.b


		layer0 = Conv_3d_Layer(rng=rng, input=layer0_input, #18
				image_shape=signals_shape0,
				filter_shape=filters_shape0, 
				W=Dropout_layer0.W * (1 - dropout_rates[1]),
				b=Dropout_layer0.b * (1 - dropout_rates[1]),
				)

		self.layers.append(layer0)
		next_layer_input = layer0.output

		##################################
		###################################
		Dropout_layer1 = Dropout_Conv_3d_Layer(rng=rng,
					input=next_dropout_input,
					image_shape=signals_shape1,
					filter_shape=filters_shape1,                    
					dropout_rate=dropout_rates[2])

		self.tparams['W_1'] = Dropout_layer1.W
		self.tparams['b_1'] = Dropout_layer1.b

		Dropout_layer1_pool = PoolLayer3D(input=Dropout_layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4

		self.dropout_layers.append(Dropout_layer1)
		# self.dropout_layers.append(Dropout_layer1_pool)
		next_dropout_input = Dropout_layer1_pool.output.dimshuffle(0,2,1,3,4)

		layer1 = Conv_3d_Layer(rng=rng, 
				input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 
				image_shape=signals_shape1,
				filter_shape=filters_shape1,
				W=Dropout_layer1.W * (1 - dropout_rates[2]),
				b=Dropout_layer1.b * (1 - dropout_rates[2]),
				)

		layer1_pool = PoolLayer3D(input=layer1.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4

		self.layers.append(layer1)
		# self.layers.append(layer1_pool)
		next_layer_input = layer1_pool.output.dimshuffle(0,2,1,3,4)

		##################################
		###################################

		Dropout_layer2 = Dropout_Conv_3d_Layer(rng=rng,
					input=next_dropout_input,
					image_shape=signals_shape2,
					filter_shape=filters_shape2,                    
					dropout_rate=dropout_rates[3])

		Dropout_layer2_pool = PoolLayer3D(input=Dropout_layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4

		self.tparams['W_2'] = Dropout_layer2.W
		self.tparams['b_2'] = Dropout_layer2.b


		self.dropout_layers.append(Dropout_layer2)
		# self.dropout_layers.append(Dropout_layer2_pool)
		next_dropout_input = Dropout_layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2)

		layer2 = Conv_3d_Layer(rng=rng, 
				input=next_layer_input, # N*4*12*12*12 => N*7*10*10*10 
				image_shape=signals_shape2,
				filter_shape=filters_shape2,
				W=Dropout_layer2.W * (1 - dropout_rates[3]),
				b=Dropout_layer2.b * (1 - dropout_rates[3]),
				)

		layer2_pool = PoolLayer3D(input=layer2.output.dimshuffle(0,2,1,3,4), pool_shape=(2,2,2)) #4

		self.layers.append(layer2)
		# self.layers.append(layer2_pool)
		next_layer_input = layer2_pool.output.dimshuffle(0,2,1,3,4).flatten(2)

		##################################

		# W4: 200*layer4_w*layer4_h, 500

		Dropout_layer3 = DropoutHiddenLayer(rng=rng,
				input=next_dropout_input,
				activation=relu,
				n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), 
				n_out=1000, 
				dropout_rate=dropout_rates[4])
		self.dropout_layers.append(Dropout_layer3)
		next_dropout_input = Dropout_layer3.output

		self.tparams['W_3'] = Dropout_layer3.W
		self.tparams['b_3'] = Dropout_layer3.b


		# Reuse the paramters from the dropout layer here, in a different
		# path through the graph.
		layer3 = HiddenLayer(rng=rng,
				input=next_layer_input,
				activation=relu,
				# scale the weight matrix W with (1-p)
				W=Dropout_layer3.W * (1 - dropout_rates[4]),
				b=Dropout_layer3.b * (1 - dropout_rates[4]),
				n_in=(flt_channels*4*layer3_d*layer3_w*layer3_h), 
				n_out=1000,
				)

		self.layers.append(layer3)
		next_layer_input = layer3.output

		##################################

		#layer4_input = layer2.output.flatten(2) # N*200

		# W4: 200*layer4_w*layer4_h, 500

		Dropout_layer4 = DropoutHiddenLayer(rng=rng,
				input=next_dropout_input,
				activation=relu,
				n_in=1000, 
				n_out=100, 
				dropout_rate=dropout_rates[5])
		self.dropout_layers.append(Dropout_layer4)
		next_dropout_input = Dropout_layer4.output

		self.tparams['W_4'] = Dropout_layer4.W
		self.tparams['b_4'] = Dropout_layer4.b

		# Reuse the paramters from the dropout layer here, in a different
		# path through the graph.
		layer4 = HiddenLayer(rng=rng,
				input=next_layer_input,
				activation=relu,
				# scale the weight matrix W with (1-p)
				W=Dropout_layer4.W * (1 - dropout_rates[5]),
				b=Dropout_layer4.b * (1 - dropout_rates[5]),
				n_in=1000, 
				n_out=100,
				)

		self.layers.append(layer4)
		next_layer_input = layer4.output

		##################### TODO #######################

		Dropout_layer5 = LogisticRegression(
				input=next_dropout_input,
				n_in=100, n_out=20)
		self.dropout_layers.append(Dropout_layer5)
		self.tparams['W_5'] = Dropout_layer5.W
		self.tparams['b_5'] = Dropout_layer5.b


		# Again, reuse paramters in the dropout output.
		layer5 = LogisticRegression(
			input=next_layer_input,
			W=Dropout_layer5.W,
			b=Dropout_layer5.b,
			n_in=100, n_out=20)
		self.layers.append(layer5)

		# Use the negative log likelihood of the logistic regression layer as
		# the objective.
		self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood
		self.dropout_errors = self.dropout_layers[-1].errors

		self.negative_log_likelihood = self.layers[-1].negative_log_likelihood
		self.errors = self.layers[-1].errors
		self.L2_sqr = T.sum(Dropout_layer0.W ** 2)+T.sum(Dropout_layer1.W**2)+T.sum(Dropout_layer2.W**2)+T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) 

		# Grab all the parameters together.
		self.params = [ param for layer in self.dropout_layers for param in layer.params ]
		

def test_fine_S_CNN_dA(finetune_lr=0.003, training_epochs=7, drop_rate=0.25, batch_size=5, reg=1e-7):
	data_ratio = 26

	[all_examples, all_labels, in_channels, all_train_sizes, test_size, val_size]= load_ATP_data(data_ratio)

	Xtr=all_examples[0]
	Xt=all_examples[1]
	Xv=all_examples[2]

	ytr=all_labels[0]
	yt=all_labels[1]
	yv=all_labels[2] 

	n_train_batches = [ a/batch_size for a in all_train_sizes]
	n_valid_batches = val_size
	n_test_batches = test_size
	n_valid_batches /= batch_size
	n_test_batches /= batch_size

	# numpy random generator
	# start-snippet-3
	numpy_rng = numpy.random.RandomState(89677)
	print '... building the model'

	flt_channels = 64
	flt_time = 3
	in_channels = 4
	flt_height = 3
	flt_width = 3

	batchsize = batch_size
	in_time = 20
	in_height = 20
	in_width = 20


	signals_shape = (batchsize, in_time, in_channels, in_height, in_width)
	filters_shape = (flt_channels, flt_time, in_channels, flt_height, flt_width)
	# construct the stacked denoising autoencoder class
	dtensor5 = T.TensorType('float32', (False,)*5)
	x = dtensor5('x')
	y = T.ivector('y')


	classifier = ConvDropNet(rng=numpy_rng, input=x, in_channels=in_channels, batch_size=batch_size, dropout_rates=[drop_rate,drop_rate,drop_rate,drop_rate,drop_rate,drop_rate])


	L2_sqr = classifier.L2_sqr
	cost = classifier.negative_log_likelihood(y)
	dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr
			   

	finetune_cost = dropout_cost
	errors = classifier.errors(y)
   
	# get the training, validation and testing function for the model
	print '... getting the finetuning functions'
   

	# compute number of minibatches for training, validation and testing
	index = T.lscalar('index')  # index to a [mini]batch
	lr = T.scalar(name='lr')

	tparams = classifier.tparams
	cost = finetune_cost+(reg*L2_sqr)
	grads = T.grad(cost, tparams.values())

	zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
	running_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad' % k) for k, p in tparams.iteritems()]
	running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k)for k, p in tparams.iteritems()]

	zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
	rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
	rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
			 for rg2, g in zip(running_grads2, grads)]

	train_fn = theano.function([x,y], cost,
									updates=zgup + rgup + rg2up,
									name='rmsprop_f_grad_shared')

	updir = [theano.shared(p.get_value() * numpy_floatX(0.),
						   name='%s_updir' % k)
			 for k, p in tparams.iteritems()]
	updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg ** 2 + 1e-4))
				 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
											running_grads2)]
	param_up = [(p, p + udn[1])
				for p, udn in zip(tparams.values(), updir_new)]
	f_update = theano.function([lr], [], updates=updir_new + param_up,
							   on_unused_input='ignore',
							   name='rmsprop_f_update')


	index = T.lscalar('index')  # index to a [mini]batch

	train_score_i = theano.function(
		[x,y],
		errors,
		name='train'
	)

	test_score_i = theano.function(
		[x,y],
		errors,
		name='test'
	)

	valid_score_i = theano.function(
		[x,y],
		errors,
		name='valid'
	)

 
	print '... finetunning the model'
	# early-stopping parameters
	patience = 100000  # look as this many examples regardless
	patience_increase = 2.  # wait this much longer when a new best is
							# found
	improvement_threshold = 0.995  # a relative improvement of this much is
								   # considered significant
	validation_frequency = min(n_train_batches[0], patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	best_validation_loss = numpy.inf
	best_test_score = numpy.inf
	test_score = 0.
	start_time = time.clock()

	done_looping = False
	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		list_file= open('../progress/progress_3DCNN_ATP_dropout_'+str(drop_rate)+'.txt','a')
		epoch = epoch + 1
		train_losses=[]
		for part_index in xrange (20):
			for minibatch_index in xrange(n_train_batches[part_index]):

				X_train=Xtr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
				y_train=ytr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
				train_set_x, train_set_y = shared_dataset(X_train,y_train)
				train_set_x = train_set_x.dimshuffle(0,4,1,2,3)

				minibatch_avg_cost = train_fn(train_set_x.eval(), train_set_y.eval())
				train_err=train_score_i(train_set_x.eval(), train_set_y.eval())
				train_losses.append(train_err)
				f_update(finetune_lr)
				iter = (epoch - 1) * n_train_batches[part_index] + minibatch_index
		train_losses=numpy.array(train_losses)
		this_train_loss=numpy.mean(train_losses)
		list_file.write('epoch %i, train error %f %%' %
			  (epoch, this_train_loss * 100.))
		list_file.write('\n')

		validation_losses=[]
		for i in xrange(n_valid_batches):
			valid_set_x, valid_set_y = shared_dataset(Xv[i*batch_size:(i+1)*batch_size],yv[i*batch_size:(i+1)*batch_size])
			valid_set_x=valid_set_x.dimshuffle(0,4,1,2,3)
			validation_losses.append(valid_score_i(valid_set_x.eval(),valid_set_y.eval()))
		this_validation_loss = numpy.mean(validation_losses)
		print('epoch %i, validation error %f %%' %
			  (epoch, this_validation_loss * 100.))
		list_file.write('epoch %i, validation error %f %%' %
			  (epoch, this_validation_loss * 100.))
		list_file.write('\n')

		dump_weights_pickle(classifier,file_name='../results/weights/weight_3DCNN_ATP_epoch_'+str(epoch)+'.zip')

		# if we got the best validation score until now
		if this_validation_loss <= best_validation_loss:
			
			#improve patience if loss improvement is good enough
			if (
				this_validation_loss < best_validation_loss *
				improvement_threshold
			):
				patience = max(patience, iter * patience_increase)

			# save best validation score and iteration number
			best_validation_loss = this_validation_loss
			best_iter = iter
			best_test_score = test_score * 100.

			
	end_time = time.clock()
	dump_weights_pickle(classifier,file_name='../results/weights/weight_3DCNN_ATP_epoch_'+str(epoch)+'.zip')

	
if __name__ == '__main__':
	# THEANO_FLAGS=device=gpu,floatX=float32 python 3DCNN_atp_drop.py 
	lr = 0.003 
	epoch = 10
	drop_rate = 0.2
	test_fine_S_CNN_dA(finetune_lr=lr,training_epochs=epoch,drop_rate=drop_rate)