# Author: Wen Torng and Russ B. Altman (2018)
# Framework for training 1DCNN-FEATURE functional site models 

import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from pprint import pprint
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from theano.tensor.nnet import conv3d2d
from scipy.io import matlab
import re
import math
from theano import shared
from collections import OrderedDict
from layers import *
from theano.misc.pkl_utils import dump
import random


def shared_dataset(data_x, data_y, borrow=True):

	shared_x = theano.shared(numpy.asarray(data_x,
										   dtype=theano.config.floatX),
							 borrow=borrow)
	shared_y = theano.shared(numpy.asarray(data_y,
										   dtype=theano.config.floatX),
							 borrow=borrow)

	return shared_x, T.cast(shared_y, 'int32')


def numpy_floatX(data):
	return numpy.asarray(data, dtype=theano.config.floatX)


def load_FEATURE_data(fold):

	pos_ff_file = open('../data/PROSITE_TP_TN/site_FF/'+site+'.'+target_RES+'.'+target_ATOM+'.'+'pos'+'.ff')
	neg_ff_file = open('../data/PROSITE_TP_TN/site_FF/'+site+'.'+target_RES+'.'+target_ATOM+'.'+'neg'+'.ff')

	pos_FF = []
	for line in pos_ff_file:
		if line[0:4]=='Env_':
			ele = line.split()
			FV=numpy.zeros((480,))
			for i in range(1,481):
				FV[i-1]=ele[i]

			pos_FF.append(FV)

	neg_FF = []
	for line in neg_ff_file:
		if line[0:4]=='Env_':
			ele = line.split()
			FV=numpy.zeros((480,))
			for i in range(1,481):
				FV[i-1]=ele[i]

			neg_FF.append(FV)

	X_pos=numpy.array(pos_FF)
	X_neg=numpy.array(neg_FF)
	y_pos=numpy.ones((X_pos.shape[0],))
	y_neg=numpy.zeros((X_neg.shape[0],))

	num_of_pos_test = X_pos.shape[0]/total_fold
	num_of_neg_test = X_neg.shape[0]/total_fold
	mask_pos_test=range(num_of_pos_test*fold,num_of_pos_test*(fold+1))
	mask_neg_test=range(num_of_neg_test*fold,num_of_neg_test*(fold+1))
	
	Xt_pos =X_pos[mask_pos_test]
	yt_pos = y_pos[mask_pos_test]
	X_pos = numpy.delete(X_pos, mask_pos_test, 0)
	y_pos = numpy.delete(y_pos, mask_pos_test, 0)

	Xt_neg =X_neg[mask_neg_test]
	yt_neg = y_neg[mask_neg_test]
	X_neg = numpy.delete(X_neg, mask_neg_test, 0)
	y_neg = numpy.delete(y_neg, mask_neg_test, 0)

	Xt=numpy.concatenate((Xt_pos, Xt_neg), axis=0)
	yt=numpy.concatenate((yt_pos, yt_neg), axis=0)
 
	s = [X_pos.shape[0],X_neg.shape[0]]
	data_size = min(s)
	min_index = s.index(data_size)
	print "pos:"+str(s[0])
	print "neg:"+str(s[1])

	X_pos_all = []
	y_pos_all = []
	X_neg_all = []
	y_neg_all = []
   
	if(min_index==0):
		ratio = float(s[1])/s[0]
		r = int(numpy.floor(ratio))
		print "more neg, ratio="+str(ratio)
		for i in range (0,r):
			X_pos_all.append(X_pos)
			y_pos_all.append(y_pos)
			X_neg_all.append(X_neg[i*data_size:(i+1)*data_size])
			y_neg_all.append(y_neg[i*data_size:(i+1)*data_size])
			
	elif(min_index==1):
		ratio = float(s[0])/s[1]
		r = int(numpy.floor(ratio))
		print "more pos ,ratio="+str(ratio)
		for i in range (0,r):
			X_pos_all.append(X_pos[i*data_size:(i+1)*data_size])
			y_pos_all.append(y_pos[i*data_size:(i+1)*data_size])
			X_neg_all.append(X_neg)
			y_neg_all.append(y_neg)


	equal_examples=[]
	equal_labels=[]

	for i in range (0,len(X_pos_all)):
		equal_examples.append(numpy.concatenate((X_pos_all[i], X_neg_all[i]), axis=0))
		equal_labels.append(numpy.concatenate((y_pos_all[i], y_neg_all[i]), axis=0))
	
	equal_examples = numpy.array(equal_examples)
	equal_labels=numpy.array(equal_labels)

	print equal_examples.shape

	equal_examples = numpy.reshape(equal_examples,(equal_examples.shape[0]*equal_examples.shape[1], 480))
	equal_labels = equal_labels.flatten()

	num_of_train = int(19*float(equal_examples.shape[0])/20)
	num_of_val = int(1*float(equal_examples.shape[0])/20)
   
	print num_of_train
	print num_of_val
	
	mask_train = random.sample(xrange(equal_examples.shape[0]), num_of_train)
	X = equal_examples[mask_train]
	y = equal_labels[mask_train]
	
	equal_examples = numpy.delete(equal_examples, mask_train, 0)
	equal_labels = numpy.delete(equal_labels, mask_train, 0)
	
	mask_val=random.sample(xrange(equal_examples.shape[0]), num_of_val)
	Xv =equal_examples[mask_val]
	yv = equal_labels[mask_val]
	
	equal_examples = numpy.delete(equal_examples, mask_val, 0)
	equal_labels = numpy.delete(equal_labels, mask_val, 0)

	all_train_x=[]
	all_train_y=[]
	all_train_sizes=[]
   
	all_examples=[X,Xt,Xv]
	all_labels=[y,yt,yv]

	return [all_examples, all_labels, X.shape[0], Xt.shape[0], Xv.shape[0]]

class CNN_1D(object):
   
	def __init__(self,
			rng,
			input,
			dropout_rates,
			batch_size,
			use_bias=True):

		self.layers = []
		self.dropout_layers = []
	
		print '... building the model'
		batchsize     = batch_size

		filter_size = 10
		num_chs = [32,64,128]
		image_shape = (batchsize, 1, 1, 480)
		filter_shape = (num_chs[0], 1, 1, filter_size) 
 
		layer0_input = input.reshape(image_shape) #20      
		next_dropout_input = layer0_input
		next_layer_input = layer0_input

		##################################

		Dropout_layer3 = DropoutCNNLayer(rng=rng,
				input=next_dropout_input,
				filter_shape=filter_shape,
				image_shape=image_shape,
				dropout_rate=dropout_rates[0])
		self.dropout_layers.append(Dropout_layer3)
		next_dropout_input = Dropout_layer3.output

		
		layer3 = Conv_1D(rng=rng,
				input=next_layer_input,
				filter_shape=filter_shape,
				image_shape=image_shape,
				W=Dropout_layer3.W * (1 - dropout_rates[0]),
				b=Dropout_layer3.b * (1 - dropout_rates[0]),
				)

		self.layers.append(layer3)
		next_layer_input = layer3.output


		image_shape = (batchsize, num_chs[0], 1, (480-filter_size)+1)
		filter_shape = (num_chs[1], num_chs[0], 1, filter_size) 


		Dropout_layer4 = DropoutCNNLayer(rng=rng,
				input=next_dropout_input,
				filter_shape=filter_shape,
				image_shape=image_shape,
				dropout_rate=dropout_rates[1])
		self.dropout_layers.append(Dropout_layer4)
		next_dropout_input = Dropout_layer4.output.flatten(2)

	   
		layer4 = Conv_1D(rng=rng,
				input=next_layer_input,
				filter_shape=filter_shape,
				image_shape=image_shape,
				# scale the weight matrix W with (1-p)
				W=Dropout_layer4.W * (1 - dropout_rates[1]),
				b=Dropout_layer4.b * (1 - dropout_rates[1])
				)

		self.layers.append(layer4)
		next_layer_input = layer4.output.flatten(2)


		image_shape = (batchsize, num_chs[1], 1, (480-2*filter_size)+2)
		n_in = num_chs[1] * (482-2*filter_size)

		Dropout_layer5 = LogisticRegression(
				input=next_dropout_input,
				n_in=n_in, n_out=2)
		self.dropout_layers.append(Dropout_layer5)

	   
		layer5 = LogisticRegression(
			input=next_layer_input,
			W=Dropout_layer5.W,
			b=Dropout_layer5.b,
			n_in=n_in, n_out=2)
		self.layers.append(layer5)

		self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood
		self.dropout_errors = self.dropout_layers[-1].errors

		self.negative_log_likelihood = self.layers[-1].negative_log_likelihood
		self.errors = self.layers[-1].errors
		self.L2_sqr = T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) 

		self.params = [ param for layer in self.dropout_layers for param in layer.params ]


def train_CNN_1D(fold, learning_rate=0.002, n_epochs=10, batch_size=20, reg=5e-6, dropout=True, dropout_rates=[0.3,0.3]):

	rng = numpy.random.RandomState(23455)
	[all_examples, all_labels, train_size, test_size, val_size]=load_FEATURE_data(fold)

	Xtr=all_examples[0]
	Xt=all_examples[1]
	Xv=all_examples[2]

	ytr=all_labels[0]
	yt=all_labels[1]
	yv=all_labels[2]

	n_train_batches = train_size/batch_size 

	n_valid_batches = val_size
	n_test_batches = test_size
	n_valid_batches /= batch_size
	n_test_batches /= batch_size

	Xtr = Xtr[:,numpy.newaxis,numpy.newaxis,:]
	Xt = Xt[:,numpy.newaxis,numpy.newaxis,:]
	Xv = Xv[:,numpy.newaxis,numpy.newaxis,:]

	test_set_x, test_set_y = shared_dataset(Xt, yt)
	valid_set_x, valid_set_y = shared_dataset(Xv, yv)

	index = T.lscalar()  # index to a [mini]batch

	dtensor4 = T.TensorType('float32', (False,)*4)
	x = dtensor4('x')
	y = T.ivector('y')  

	classifier = CNN_1D(rng=rng, input=x, batch_size=batch_size, dropout_rates=[0.3,0.3])

	L2_sqr = classifier.L2_sqr
	cost = classifier.negative_log_likelihood(y)
	dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr
			   
	print '... building the model'
	 # Compile theano function for testing.
	test_model = theano.function(inputs=[index],
			outputs=classifier.errors(y),
			givens={
				x: test_set_x[index * batch_size:(index + 1) * batch_size],
				y: test_set_y[index * batch_size:(index + 1) * batch_size]})
   
	validate_model = theano.function(inputs=[index],
			outputs=classifier.errors(y),
			givens={
				x: valid_set_x[index * batch_size:(index + 1) * batch_size],
				y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
   
	output = dropout_cost if dropout else cost
	grads = []
	for param in classifier.params:
		# Use the right cost function here to train with or without dropout.
		gparam = T.grad(output, param)
		grads.append(gparam)

	updates = []
	for param_i, grad_i in zip(classifier.params, grads):
		updates.append((param_i, param_i - learning_rate * grad_i))

	train_model = theano.function([x,y], cost, updates=updates)


	###############
	# TRAIN MODEL #
	###############
	print '... training'    
		
	# early-stopping parameters
	patience = 100000  # look as this many examples regardless
	patience_increase = 2  # wait this much longer when a new best is
						   # found
	improvement_threshold = 0.995  # a relative improvement of this much is
								   # considered significant
	validation_frequency = min(n_train_batches, patience / 2)
	
	best_params = None
	best_validation_loss = numpy.inf
	best_iter = 0
	test_score = 0.
	
	start_time = time.clock()

	cost_ij = 0
	epoch = 0
	done_looping = False
	iter = 0
	startc = time.clock()
	cost_hisotry=[]
	train_history=[]
	valid_history=[]


	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in range(n_train_batches):
			X_train=Xtr[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
			y_train=ytr[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
			print "X_train:"
			print X_train.shape
			print "y_train:"
			print y_train.shape
			train_set_x, train_set_y = shared_dataset(X_train,y_train)
			
			iter = (epoch - 1) * n_train_batches + minibatch_index

			cost_ij = train_model(train_set_x.eval(), train_set_y.eval())
			cost_hisotry.append(cost_ij)
			if (iter + 1) % validation_frequency == 0:
				list_file= open('../progress/progress_FSCNN_FEATURE_CNN_1D_'+site+'_'+str(fold)+'.txt','a')
				validation_losses = [validate_model(i) for i
									 in range(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)
				valid_history.append(100*(1-this_validation_loss))
				print('epoch %i, minibatch %i/%i, validation error %f %%' %
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))
				list_file.write('epoch %i, minibatch %i/%i, validation error %f %%' %
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))
				list_file.write('\n')
				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:
				   
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)
				   
					best_validation_loss = this_validation_loss
					best_iter = iter
				   
					test_losses = [
						test_model(i)
						for i in range(n_test_batches)
					]
					test_score = numpy.mean(test_losses)
					print(('     epoch %i, minibatch %i/%i, test error of '
						   'best model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))
					list_file.write(('     epoch %i, minibatch %i/%i, test error of '
						   'best model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))
					list_file.write('\n')

					dump_weights_pickle(classifier,'../results/weights/weight_FEATURE_1DCNN_'+site+'_'+str(fold)+'.zip')

					
				list_file.close()
		
		
def dump_weights_pickle(classifier,file_name):
 
	W3 = classifier.params[0]
	W4 = classifier.params[2]
	W5 = classifier.params[4]
	b3 = classifier.params[1]
	b4 = classifier.params[3]
	b5 = classifier.params[5]

	with open(file_name, 'wb') as f:
		dump((W3, W4, W5, b3, b4, b5), f)


if __name__ == '__main__':

	target_RES = sys.argv[1]
	target_ATOM = sys.argv[2]
	site = sys.argv[3]
	fold = int(sys.argv[4])

	total_fold = 5
	result_weights_ID = target_RES+"_"+target_ATOM+"_"+site
	train_CNN_1D(fold=fold,learning_rate=0.002, n_epochs=20, batch_size=20, reg=5e-6)