import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from layers import *
import random
from data_utils import *
from theano.misc.pkl_utils import dump
import argparse

class MLP(object):
    """A multilayer perceptron with all the trappings required to do dropout
    training.

    """
    def __init__(self,
            rng,
            input,
            in_channels,
            dropout_rates,
            batch_size,
            use_bias=True):

        #rectified_linear_activation = lambda x: T.maximum(0.0, x)

        # Set up all the hidden layers
        #weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:])
        self.layers = []
        self.dropout_layers = []
    
        num_3d_pixel=20

        layer0_w = num_3d_pixel 
        layer0_h = num_3d_pixel
        layer0_d = num_3d_pixel
 
        ######################
        # BUILD ACTUAL MODEL #
        ######################
       
        print '... building the model'
        # image sizes
        batchsize     = batch_size
        in_time       = num_3d_pixel
        in_width      = num_3d_pixel
        in_height     = num_3d_pixel
        #filter sizes
       

        signals_shape0 = (batchsize, in_time, in_channels, in_height, in_width)
        

        layer0_input = input.reshape(signals_shape0) #20      
        next_dropout_input = layer0_input.flatten(2)
        next_layer_input = layer0_input.flatten(2)

        ##################################

        # W4: 200*layer4_w*layer4_h, 500

        Dropout_layer3 = DropoutHiddenLayer(rng=rng,
                input=next_dropout_input,
                activation=relu,
                n_in=(4*20*20*20), 
                n_out=10000, 
                dropout_rate=dropout_rates[0])
        self.dropout_layers.append(Dropout_layer3)
        next_dropout_input = Dropout_layer3.output

        # Reuse the paramters from the dropout layer here, in a different
        # path through the graph.
        layer3 = HiddenLayer(rng=rng,
                input=next_layer_input,
                activation=relu,
                # scale the weight matrix W with (1-p)
                W=Dropout_layer3.W * (1 - dropout_rates[0]),
                b=Dropout_layer3.b * (1 - dropout_rates[0]),
                n_in=(4*20*20*20),
                n_out=10000,
                )

        self.layers.append(layer3)
        next_layer_input = layer3.output

    
        Dropout_layer4 = DropoutHiddenLayer(rng=rng,
                input=next_dropout_input,
                activation=relu,
                n_in=10000, 
                n_out=100, 
                dropout_rate=dropout_rates[1])
        self.dropout_layers.append(Dropout_layer4)
        next_dropout_input = Dropout_layer4.output

        # Reuse the paramters from the dropout layer here, in a different
        # path through the graph.
        layer4 = HiddenLayer(rng=rng,
                input=next_layer_input,
                activation=relu,
                # scale the weight matrix W with (1-p)
                W=Dropout_layer4.W * (1 - dropout_rates[1]),
                b=Dropout_layer4.b * (1 - dropout_rates[1]),
                n_in=10000, 
                n_out=100,
                )

        self.layers.append(layer4)
        next_layer_input = layer4.output

        ##################### TODO #######################

        Dropout_layer5 = LogisticRegression(
                input=next_dropout_input,
                n_in=100, n_out=20)
        self.dropout_layers.append(Dropout_layer5)

        # Again, reuse paramters in the dropout output.
        layer5 = LogisticRegression(
            input=next_layer_input,
            # scale the weight matrix W with (1-p)
            W=Dropout_layer5.W,
            b=Dropout_layer5.b,
            n_in=100, n_out=20)
        self.layers.append(layer5)

        # Use the negative log likelihood of the logistic regression layer as
        # the objective.
        self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood
        self.dropout_errors = self.dropout_layers[-1].errors

        self.negative_log_likelihood = self.layers[-1].negative_log_likelihood
        self.errors = self.layers[-1].errors
        self.L2_sqr = T.sum(Dropout_layer3.W**2)+T.sum(Dropout_layer4.W**2)+T.sum(Dropout_layer5.W**2) 

        # Grab all the parameters together.
        self.params = [ param for layer in self.dropout_layers for param in layer.params ]


def train_MLP(learning_rate=0.002, n_epochs=10, batch_size=20, reg=5e-6, dropout=True, dropout_rates=[0.3,0.3]):

    rng = numpy.random.RandomState(23455)
    [all_examples, all_labels, all_train_sizes, test_size, val_size]=load_ATOM_BOX()

    Xtr=all_examples[0]
    Xt=all_examples[1]
    Xv=all_examples[2]

    ytr=all_labels[0]
    yt=all_labels[1]
    yv=all_labels[2]

    test_set_x, test_set_y = shared_dataset(Xt, yt)
    valid_set_x, valid_set_y = shared_dataset(Xv, yv)
    
    test_set_x=test_set_x.dimshuffle(0,4,1,2,3)
    valid_set_x=valid_set_x.dimshuffle(0,4,1,2,3)

    n_train_batches = [ a/batch_size for a in all_train_sizes]

    n_valid_batches = val_size
    n_test_batches = test_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

   
    index = T.lscalar()  # index to a [mini]batch
    dtensor5 = T.TensorType('float32', (False,)*5)
    x = dtensor5('x')
    y = T.ivector('y')  


    classifier = MLP(rng=rng, input=x, in_channels=4, batch_size=batch_size, dropout_rates=[0.3,0.3])

    L2_sqr = classifier.L2_sqr
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)+reg*L2_sqr
               
    print '... building the model'
     # Compile theano function for testing.
    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]})
   
    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
   
    output = dropout_cost if dropout else cost
    grads = []
    for param in classifier.params:
        # Use the right cost function here to train with or without dropout.
        gparam = T.grad(output, param)
        grads.append(gparam)

    updates = []
    for param_i, grad_i in zip(classifier.params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([x,y], cost, updates=updates)


    ###############
    # TRAIN MODEL #
    ###############
    print '... training'    
        
    # early-stopping parameters
    patience = 100000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches[0], patience / 2)
    
    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    
    start_time = time.clock()

    cost_ij = 0
    epoch = 0
    done_looping = False
    iter = 0
    startc = time.clock()
    cost_hisotry=[]
    train_history=[]
    valid_history=[]


    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for part_index in range (6):
            for minibatch_index in range(n_train_batches[part_index]):
                X_train=Xtr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
                y_train=ytr[part_index][minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
                train_set_x, train_set_y = shared_dataset(X_train,y_train)
                train_set_x = train_set_x.dimshuffle(0,4,1,2,3)

                iter = (epoch - 1) * n_train_batches[part_index] + minibatch_index

                cost_ij = train_model(train_set_x.eval(), train_set_y.eval())
                cost_hisotry.append(cost_ij)
                if (iter + 1) % validation_frequency == 0:
                    list_file= open('../progress_MLP.txt','a')
                    validation_losses = [validate_model(i) for i
                                         in range(n_valid_batches)]
                    this_validation_loss = numpy.mean(validation_losses)
                    valid_history.append(100*(1-this_validation_loss))
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches[part_index],
                           this_validation_loss * 100.))
                    list_file.write('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches[part_index],
                           this_validation_loss * 100.))
                    list_file.write('\n')
                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        #improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                            patience = max(patience, iter * patience_increase)
                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
                        # test it on the test set
                        test_losses = [
                            test_model(i)
                            for i in range(n_test_batches)
                        ]
                        test_score = numpy.mean(test_losses)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches[part_index],
                               test_score * 100.))
                        list_file.write(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches[part_index],
                               test_score * 100.))
                        list_file.write('\n')
                        
                    list_file.close()
        list_file= open('../progress_MLP.txt','a')
        list_file.write('getting weights from classifier ...'+'\n')
        dump_weights_pickle(classifier,'../weights/weight_MLP.zip')

    end_time = time.clock()
    dump_weights_pickle(classifier,'../weights/weight_MLP.zip')
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    
def dump_weights_pickle(classifier,file_name):
 
    W3 = classifier.params[0]
    W4 = classifier.params[2]
    W5 = classifier.params[4]
    b3 = classifier.params[1]
    b4 = classifier.params[3]
    b5 = classifier.params[5]

    with open(file_name, 'wb') as f:
        dump((W3, W4, W5, b3, b4, b5), f)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("epoch", help="epoch")
    args = parser.parse_args()
    num_epoch = int(args.epoch)
   
    train_MLP(learning_rate=0.002, n_epochs=num_epoch, batch_size=20, reg=5e-6)