import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from pprint import pprint
from scipy.io import matlab
import re
import math
from collections import OrderedDict
from store_pytable import *
from layers import *
from theano.misc.pkl_utils import dump

def numpy_floatX(data):
	return numpy.asarray(data, dtype=theano.config.floatX)

def load_ATP_data():

	dataName = "data";
	labelName = "label";
	labelShape = [];

	pos_or_neg = 'pos'
	ID = 'train'+'_'+'pos'
	filename_train_pos = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_pos, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_train_pos=dataColumn_train[:]
	y_train_pos=labelColumn_train[:]
	num_train_pos = X_train_pos.shape[0]

	
	pos_or_neg = 'neg'
	ID = 'train'+'_'+'neg'
	filename_train_neg = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_neg, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_train_neg=dataColumn_train[:]
	y_train_neg=labelColumn_train[:]

	pos_or_neg = 'pos'
	ID = 'test'+'_'+'pos'
	filename_train_pos = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_pos, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_test_pos=dataColumn_train[:]
	y_test_pos=labelColumn_train[:]
	
	pos_or_neg = 'neg'
	ID = 'test'+'_'+'neg'
	filename_train_neg = "../data/pytables_atp/"+ID+".pytables";
	h5file_train = tables.openFile(filename_train_neg, mode="r")
	dataColumn_train = getH5column(h5file_train, dataName);
	labelColumn_train = getH5column(h5file_train, labelName);
	X_test_neg=dataColumn_train[:]
	y_test_neg=labelColumn_train[:]

	num_of_pos_test = X_test_pos.shape[0]
	num_of_neg_test = X_test_neg.shape[0]

	Xt=numpy.concatenate((X_test_pos, X_test_neg), axis=0)
	yt=numpy.concatenate((y_test_pos, y_test_neg), axis=0)

	num_of_train_pos = int(19*float(X_train_pos.shape[0])/20)
	num_of_val_pos = int(1*float(X_train_pos.shape[0])/20)
	num_of_train_neg = int(19*float(X_train_neg.shape[0])/20)
	num_of_val_neg = int(1*float(X_train_neg.shape[0])/20)
	
	mask_train_pos = random.sample(xrange(X_train_pos.shape[0]), num_of_train_pos)
	X_tr_pos = X_train_pos[mask_train_pos]
	y_tr_pos = y_train_pos[mask_train_pos]

	mask_train_neg = random.sample(xrange(X_train_neg.shape[0]), num_of_train_neg)
	X_tr_neg = X_train_neg[mask_train_neg]
	y_tr_neg = y_train_neg[mask_train_neg]
	
	X_train_pos = numpy.delete(X_train_pos, mask_train_pos, 0)
	y_train_pos = numpy.delete(y_train_pos, mask_train_pos, 0)
	X_train_neg = numpy.delete(X_train_neg, mask_train_neg, 0)
	y_train_neg = numpy.delete(y_train_neg, mask_train_neg, 0)
	
	
	Xv_pos =X_train_pos
	yv_pos = y_train_pos
	Xv_neg =X_train_neg
	yv_neg = y_train_neg

	X = numpy.concatenate((X_tr_pos,X_tr_neg),axis=0)
	y = numpy.concatenate((y_tr_pos,y_tr_neg),axis=0)
	Xv = numpy.concatenate((Xv_pos,Xv_neg),axis=0)
	yv = numpy.concatenate((yv_pos,yv_neg),axis=0)

	from sklearn.utils import shuffle
	X, y = shuffle(X, y)

	all_train_x=[]
	all_train_y=[]
	all_train_sizes=[]
	
	all_examples=[X,Xt,Xv]
	all_labels=[y,yt,yv]
  
	return [all_examples, all_labels, 4, all_train_sizes, Xt.shape[0], Xv.shape[0]]


def test_fine_S_CNN_dA():
	
	[all_examples, all_labels, in_channels, all_train_sizes, test_size, val_size]= load_ATP_data()

	Xtr=all_examples[0]
	Xt=all_examples[1]
	Xv=all_examples[2]
	Xtr = numpy.reshape(Xtr, (Xtr.shape[0],-1)) 
	Xt = numpy.reshape(Xt, (Xt.shape[0],-1)) 
	Xv = numpy.reshape(Xv, (Xv.shape[0],-1)) 

	from sklearn.preprocessing import MinMaxScaler
	scaling = MinMaxScaler(feature_range=(-1,1)).fit(Xtr)
	Xtr = scaling.transform(Xtr)
	Xt = scaling.transform(Xt)
	Xv = scaling.transform(Xv)


	ytr=all_labels[0]
	yt=all_labels[1]
	yv=all_labels[2] 


	from sklearn import svm
	import cPickle
	clf = svm.SVC(class_weight='balanced',probability=True)
	clf.fit(Xtr, ytr)  
	pred_y = clf.predict(Xt)
	prob_y = clf.predict_proba(Xt)
	
	prob_y.dump('../results/prob_scores/Voxel_SVM/'+'prob_y.dat')
	yt.dump('../results/prob_scores/Voxel_SVM/'+'true_y.dat')


	with open('../results/weights/Voxel_SVM_ATP.pkl', 'wb') as fid:
		cPickle.dump(clf, fid) 


if __name__ == '__main__':

	test_fine_S_CNN_dA()