import os import sys import time import numpy import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from pprint import pprint from scipy.io import matlab import re import math from collections import OrderedDict from store_pytable import * from layers import * from theano.misc.pkl_utils import dump def numpy_floatX(data): return numpy.asarray(data, dtype=theano.config.floatX) def load_ATP_data(): dataName = "data"; labelName = "label"; labelShape = []; pos_or_neg = 'pos' ID = 'train'+'_'+'pos' filename_train_pos = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_pos, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_train_pos=dataColumn_train[:] y_train_pos=labelColumn_train[:] num_train_pos = X_train_pos.shape[0] pos_or_neg = 'neg' ID = 'train'+'_'+'neg' filename_train_neg = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_neg, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_train_neg=dataColumn_train[:] y_train_neg=labelColumn_train[:] pos_or_neg = 'pos' ID = 'test'+'_'+'pos' filename_train_pos = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_pos, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_test_pos=dataColumn_train[:] y_test_pos=labelColumn_train[:] pos_or_neg = 'neg' ID = 'test'+'_'+'neg' filename_train_neg = "../data/pytables_atp/"+ID+".pytables"; h5file_train = tables.openFile(filename_train_neg, mode="r") dataColumn_train = getH5column(h5file_train, dataName); labelColumn_train = getH5column(h5file_train, labelName); X_test_neg=dataColumn_train[:] y_test_neg=labelColumn_train[:] num_of_pos_test = X_test_pos.shape[0] num_of_neg_test = X_test_neg.shape[0] Xt=numpy.concatenate((X_test_pos, X_test_neg), axis=0) yt=numpy.concatenate((y_test_pos, y_test_neg), axis=0) num_of_train_pos = int(19*float(X_train_pos.shape[0])/20) num_of_val_pos = int(1*float(X_train_pos.shape[0])/20) num_of_train_neg = int(19*float(X_train_neg.shape[0])/20) num_of_val_neg = int(1*float(X_train_neg.shape[0])/20) mask_train_pos = random.sample(xrange(X_train_pos.shape[0]), num_of_train_pos) X_tr_pos = X_train_pos[mask_train_pos] y_tr_pos = y_train_pos[mask_train_pos] mask_train_neg = random.sample(xrange(X_train_neg.shape[0]), num_of_train_neg) X_tr_neg = X_train_neg[mask_train_neg] y_tr_neg = y_train_neg[mask_train_neg] X_train_pos = numpy.delete(X_train_pos, mask_train_pos, 0) y_train_pos = numpy.delete(y_train_pos, mask_train_pos, 0) X_train_neg = numpy.delete(X_train_neg, mask_train_neg, 0) y_train_neg = numpy.delete(y_train_neg, mask_train_neg, 0) Xv_pos =X_train_pos yv_pos = y_train_pos Xv_neg =X_train_neg yv_neg = y_train_neg X = numpy.concatenate((X_tr_pos,X_tr_neg),axis=0) y = numpy.concatenate((y_tr_pos,y_tr_neg),axis=0) Xv = numpy.concatenate((Xv_pos,Xv_neg),axis=0) yv = numpy.concatenate((yv_pos,yv_neg),axis=0) from sklearn.utils import shuffle X, y = shuffle(X, y) all_train_x=[] all_train_y=[] all_train_sizes=[] all_examples=[X,Xt,Xv] all_labels=[y,yt,yv] return [all_examples, all_labels, 4, all_train_sizes, Xt.shape[0], Xv.shape[0]] def test_fine_S_CNN_dA(): [all_examples, all_labels, in_channels, all_train_sizes, test_size, val_size]= load_ATP_data() Xtr=all_examples[0] Xt=all_examples[1] Xv=all_examples[2] Xtr = numpy.reshape(Xtr, (Xtr.shape[0],-1)) Xt = numpy.reshape(Xt, (Xt.shape[0],-1)) Xv = numpy.reshape(Xv, (Xv.shape[0],-1)) from sklearn.preprocessing import MinMaxScaler scaling = MinMaxScaler(feature_range=(-1,1)).fit(Xtr) Xtr = scaling.transform(Xtr) Xt = scaling.transform(Xt) Xv = scaling.transform(Xv) ytr=all_labels[0] yt=all_labels[1] yv=all_labels[2] from sklearn import svm import cPickle clf = svm.SVC(class_weight='balanced',probability=True) clf.fit(Xtr, ytr) pred_y = clf.predict(Xt) prob_y = clf.predict_proba(Xt) prob_y.dump('../results/prob_scores/Voxel_SVM/'+'prob_y.dat') yt.dump('../results/prob_scores/Voxel_SVM/'+'true_y.dat') with open('../results/weights/Voxel_SVM_ATP.pkl', 'wb') as fid: cPickle.dump(clf, fid) if __name__ == '__main__': test_fine_S_CNN_dA()