import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from pprint import pprint
from scipy.io import matlab
import re
import math
from theano import shared
from collections import OrderedDict
from layers import *
from theano.misc.pkl_utils import dump
import numpy as np
from io_utils_MUV import load_MUV_data_shuffle_fold, load_MUV_dataset_CV, test_60
from mol_graph import *
from process_poc_pretrain import *

max_poc_degrees = 20
max_nodes_in_poc = 50
max_mol_degrees = 6
max_nodes_in_mol = 60

input_dir = '../data/ALL_ff/'
input_ext = '.ff'

def pocket_ff_to_numpy(ff_list,max_ff, min_ff, mean_ff):
	print ("ff_list")
	all_pocket=[]
	dat_num=0
	for fn in ff_list:
		FV=[]
		site_ID=fn.strip('.ff')
		ele = fn.split('_')
		PDB = fn[0:4]
		lig = ele[1].split('.')[0]
		correct_fn = PDB.lower()+'_'+lig+'.ff'
		f = open(os.path.join(input_dir,correct_fn))
		infile=list(f)
		for line in infile:
			S=line.split()
			if S!=[]:
				if len(S[0])>3:
					if S[0][0:3]=="Env":
						# print line
						feature_vec=numpy.zeros((480,))
						for i in range (0,480): #S[1]-S[480]
							if max_ff[i]-min_ff[i]!=0:
								feature_vec[i]=(float(S[i+1])-min_ff[i])/(max_ff[i]-min_ff[i])
							else:
								feature_vec[i]=float(S[i+1])

						x=float(S[482])
						y=float(S[483])
						z=float(S[484])
						pos=[x,y,z]
						pos=numpy.array(pos)

						T=[feature_vec,pos]
						T=numpy.array(T)
						FV.append(T)
			
		f.close()
		vectors = numpy.array(FV)
		all_pocket.append([fn,vectors])

	return all_pocket

def shared_dataset_mol(mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mol_mask, borrow=True):

	shared_mol_atom_features = theano.shared(numpy.asarray(mol_atom_features, dtype=theano.config.floatX),borrow=borrow)
	shared_mol_bond_features = theano.shared(numpy.asarray(mol_bond_features, dtype=theano.config.floatX),borrow=borrow)

	shared_mol_atom_neighbors = theano.shared(numpy.asarray(mol_atom_neighbors, dtype=theano.config.floatX),borrow=borrow)
	shared_mol_bond_neighbors = theano.shared(numpy.asarray(mol_bond_neighbors, dtype=theano.config.floatX),borrow=borrow)

	shared_mol_degrees = theano.shared(numpy.asarray(mol_degrees, dtype=theano.config.floatX),borrow=borrow)
	shared_mask = theano.shared(numpy.asarray(mol_mask, dtype=theano.config.floatX),borrow=borrow)
   

	return shared_mol_atom_features, shared_mol_bond_features, shared_mol_atom_neighbors, shared_mol_bond_neighbors, shared_mol_degrees, shared_mask


def shared_dataset(env_features, env_neighbors, env_degrees, mask, labels, borrow=True):

	shared_env_features = theano.shared(numpy.asarray(env_features,dtype=theano.config.floatX),borrow=borrow)
	shared_env_neighbors = theano.shared(numpy.asarray(env_neighbors,dtype=theano.config.floatX),borrow=borrow)
	shared_env_degrees = theano.shared(numpy.asarray(env_degrees,dtype=theano.config.floatX),borrow=borrow)
	shared_mask = theano.shared(numpy.asarray(mask,dtype=theano.config.floatX),borrow=borrow)
	shared_labels = theano.shared(numpy.asarray(labels,dtype=theano.config.floatX),borrow=borrow)

	return shared_env_features, shared_env_neighbors, shared_env_degrees, shared_mask, T.cast(shared_labels, 'int32')

def sum_and_stack(features, mask, num_envs, fp_length, max_nodes):
	# features: 250, 512
	# mask: 250
	features = features * mask.dimshuffle(0,'x')
	stacked_features = T.reshape(features, (int(num_envs/max_nodes), int(max_nodes), int(fp_length)), ndim=3) 

	return T.sum(stacked_features,axis=1)

def sum_and_stack_atoms(features, mask, num_input, max_nodes_in_mol, fp_length):
	# features: 250, 512
	# mask: 250
	features = features * mask.dimshuffle(0,'x')
	stacked_features = T.reshape(features, (int(num_input), int(max_nodes_in_mol), int(fp_length)), ndim=3) 

	return T.sum(stacked_features,axis=1)


def relu(X):
	"""Rectified linear units (relu)"""
	return T.maximum(0,X)

class PocketGraph(object):
	def __init__(self):
		self.nodes = {} # dict of lists of nodes, keyed by node type

	def new_node(self, ntype, pos, features=None, env_ix=None):
		new_node = Env(ntype, pos, features, env_ix)
		self.nodes.setdefault(ntype, []).append(new_node)
		return new_node

	def add_subgraph(self, subgraph):
		old_nodes = self.nodes
		new_nodes = subgraph.nodes
		for ntype in set(old_nodes.keys()) | set(new_nodes.keys()):
			old_nodes.setdefault(ntype, []).extend(new_nodes.get(ntype, []))


	def get_degree(self, ntype):
		all_node_degree=[]
		for node in self.nodes[ntype]:
			all_node_degree.append(len(node.get_neighbors(ntype)))
		return numpy.array(all_node_degree)
	def feature_array(self, ntype):
		assert ntype in self.nodes
		return np.array([node.features for node in self.nodes[ntype]])

	def pos_array(self, ntype):
		assert ntype in self.nodes
		return np.array([node.pos for node in self.nodes[ntype]])

	def neighbor_list(self, self_ntype, neighbor_ntype):
		assert self_ntype in self.nodes and neighbor_ntype in self.nodes
		neighbor_idxs = {n : i for i, n in enumerate(self.nodes[neighbor_ntype])}
		for self_node in self.nodes[self_ntype]:
			
			# print "indi number of env:"
			neighbors=self_node.get_neighbors(neighbor_ntype)
		return [[neighbor_idxs[neighbor]
				 for neighbor in self_node.get_neighbors(neighbor_ntype)]
				for self_node in self.nodes[self_ntype]]
	def env_ix_array(self):
		return np.array([node.env_ix for node in self.nodes['env']])

class Env(object):
	__slots__ = ['ntype', 'features', '_neighbors', 'pos', 'env_ix']
	def __init__(self, ntype, pos, features, env_ix):
		self.ntype = ntype
		self.features = features
		self._neighbors = []
		self.pos = pos
		self.env_ix = env_ix
		
	def add_neighbors(self, neighbor_list):
		for neighbor in neighbor_list:
			self._neighbors.append(neighbor)
			neighbor._neighbors.append(self)

	def get_neighbors(self, ntype):
		return [n for n in self._neighbors if n.ntype == ntype]


def dist(env_1,env_2):
	return np.sqrt(np.sum((env_1.pos-env_2.pos)**2))

def pad_neighbors(neighbors):
	pad_neighbors = numpy.zeros((len(neighbors),len(neighbors)))
	for i in range (len(neighbors)):
		entry = neighbors[i]
		for e in range (len(entry)):
			pad_neighbors[i][entry[e]]=1
	return pad_neighbors

def pad_neighbors_bond(neighbors,num_atoms,num_bonds):
	pad_neighbors = numpy.zeros((num_atoms,num_bonds))
	for i in range (len(neighbors)):
		entry = neighbors[i]
		for e in range (len(entry)):
			pad_neighbors[i][entry[e]]=1
	return pad_neighbors


def pad_degree(degrees,max_degrees):
	pad_degrees = numpy.zeros((len(degrees),max_degrees))
	for i in range (len(degrees)):
		degree = degrees[i]
		pad_degrees[i][degree]=1
	return pad_degrees
def get_pocket_attributes(ff_list,max_ff, min_ff, mean_ff):
	pockets = pocket_ff_to_numpy(ff_list,max_ff, min_ff, mean_ff)
	big_graph, mask = graph_from_pocket_tuple(pockets)
	env_features = big_graph.feature_array('env')
	env_neighbors = big_graph.neighbor_list('env','env')
	env_neighbors = pad_neighbors(env_neighbors)
	env_degrees = big_graph.get_degree('env')
	env_degrees = pad_degree(env_degrees, max_poc_degrees)
   
	return env_features, env_neighbors, env_degrees, mask


def get_atom_bond_dim(smiles_tuple):
	big_graph, mask = graph_from_smiles_tuple(smiles_tuple)
	mol_atom_features = big_graph.feature_array('atom')
	mol_bond_features = big_graph.feature_array('bond')
	num_atom_features = mol_atom_features.shape[1]
	num_bond_features = mol_bond_features.shape[1]
   
	return num_atom_features, num_bond_features


def get_mol_attributes(smiles_tuple):
	big_graph, mask = graph_from_smiles_tuple(smiles_tuple)
	num_atoms = len(big_graph.nodes['atom'])
	num_bonds = len(big_graph.nodes['bond'])
	mol_atom_features = big_graph.feature_array('atom')
	mol_bond_features = big_graph.feature_array('bond')
	mol_atom_neighbors = big_graph.neighbor_list('atom','atom')
	mol_bond_neighbors = big_graph.neighbor_list('atom','bond')
	mol_atom_neighbors = pad_neighbors(mol_atom_neighbors)
	mol_bond_neighbors = pad_neighbors_bond(mol_bond_neighbors,num_atoms,num_bonds)
	mol_degrees = big_graph.get_degree('atom')
	mol_degrees = pad_degree(mol_degrees, max_mol_degrees)
   
	return mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mask

def graph_from_pocket_tuple(pocket_Env_list):

	graph_list = [graph_from_Env(s) for s in pocket_Env_list]
	big_graph = PocketGraph()
	for i in range (len(graph_list)):
		subgraph = graph_list[i]
		graph, mask = subgraph
		big_graph.add_subgraph(graph)
		if i ==0:
			big_graph_mask = mask
		else:
			big_graph_mask = numpy.concatenate((big_graph_mask, mask), axis=0)

	return big_graph, big_graph_mask

def graph_from_smiles_tuple(smiles_tuple):
	graph_list = [graph_from_smiles(s) for s in smiles_tuple]
	big_graph = MolGraph()
	count=0
	for i in range (len(graph_list)):
		subgraph = graph_list[i]
		graph, mask = subgraph
		if graph is None:
			print ("I found a bug!!!!")
		else:
			big_graph.add_subgraph(graph)
			if count ==0:
				big_graph_mask = mask
			else:
				big_graph_mask = numpy.concatenate((big_graph_mask, mask), axis=0)
			count=count+1
	return big_graph, big_graph_mask

def graph_from_smiles(smiles):
	graph = MolGraph()
	mol = MolFromSmiles(smiles)
	if not mol:
		print ("Could not parse SMILES string:")
		print (smiles)
		return None, None
	else:
		atoms_by_rd_idx = {}

		for atom in mol.GetAtoms():
			features = atom_features(atom)
			if features[0]==False:
				return None, None

			new_atom_node = graph.new_node('atom', features=features[1], rdkit_ix=atom.GetIdx())
			atoms_by_rd_idx[atom.GetIdx()] = new_atom_node
			dummpy_atom_shape=features[1].shape

		for bond in mol.GetBonds():
			atom1_node = atoms_by_rd_idx[bond.GetBeginAtom().GetIdx()]
			atom2_node = atoms_by_rd_idx[bond.GetEndAtom().GetIdx()]
			new_bond_node = graph.new_node('bond', features=bond_features(bond))
			new_bond_node.add_neighbors((atom1_node, atom2_node))
			atom1_node.add_neighbors((atom2_node,))

		num_of_atoms = len(mol.GetAtoms())
		mask = numpy.zeros((max_nodes_in_mol,))
		# print "num_of_atoms"
		# print num_of_atoms 
		for i in range(num_of_atoms):
			mask[i]=1

		if num_of_atoms<max_nodes_in_mol:
			for i in range(num_of_atoms,max_nodes_in_mol):
				dummy_atom_node = graph.new_node('atom', features=numpy.zeros(dummpy_atom_shape), rdkit_ix=-1)


		mol_node = graph.new_node('molecule')
		mol_node.add_neighbors(graph.nodes['atom'])
		return graph, mask


def graph_from_Env(all_ffs):
	# graph = PocketGraph()
	file_name=all_ffs[0]
	pdb=file_name[0:4]
	# print file_name
	all_ffs=all_ffs[1]

	num_of_Env = len(all_ffs)
	mask = numpy.zeros((max_nodes_in_poc,))
	for i in range(num_of_Env):
		mask[i]=1
	graph = Connectivity_from_ff(all_ffs)
	pocket_node = graph.new_node('pocket', pos=[0,0,0])
	if 'env' in graph.nodes.keys():
		pocket_node.add_neighbors(graph.nodes['env'])
	
	return graph, mask

def Connectivity_from_ff(all_ffs):
	graph = parse_ff_to_Env(all_ffs)
	if 'env' in graph.nodes.keys():
		for i in range (0,len(all_ffs)):
			for j in range (i+1,len(all_ffs)):
				env_1=graph.nodes['env'][i]
				env_2=graph.nodes['env'][j]
				if dist(env_1,env_2)<7:
					env_1.add_neighbors([env_2])

	return graph

def parse_ff_to_Env(all_ffs):
	graph = PocketGraph()
	for i in range (0,len(all_ffs)):
		env=all_ffs[i][0]
		pos=all_ffs[i][1]
		new_atom_node = graph.new_node('env', features=env, pos=pos, env_ix=i)
	if all_ffs.shape[0]<max_nodes_in_poc:
		for i in range(all_ffs.shape[0],max_nodes_in_poc):
			dummy_atom_node = graph.new_node('env', features=numpy.zeros(env.shape), pos=[0,0,0], env_ix=i)

	return graph

def numpy_floatX(data):
	return numpy.asarray(data, dtype=theano.config.floatX)

def dump_weights_pickle(Weights, Bias, file_name):

	[W_poc_self_1,  W_poc_self_2,  W_poc_out, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_hid, W_log, W_poc_degrees_1, W_poc_degrees_2, W_mol_degrees_1, W_mol_degrees_2] = Weights
	[b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log] = Bias

	w_poc_degree_1_tuple = ()
	w_mol_degree_1_tuple = ()
	w_poc_degree_2_tuple = ()
	w_mol_degree_2_tuple = ()

	for i in range(0,max_poc_degrees):
		w_poc_degree_1_tuple = w_poc_degree_1_tuple + (W_poc_degrees_1[i],)

	for i in range(0,max_mol_degrees):
		w_mol_degree_1_tuple = w_mol_degree_1_tuple + (W_mol_degrees_1[i],)

	for i in range(0,max_poc_degrees):
		w_poc_degree_2_tuple = w_poc_degree_2_tuple + (W_poc_degrees_2[i],)

	for i in range(0,max_mol_degrees):
		w_mol_degree_2_tuple = w_mol_degree_2_tuple + (W_mol_degrees_2[i],)


	all_var_tuple=(W_poc_self_1,  W_poc_self_2,  W_poc_out, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_hid, W_log, b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log)
	all_var_tuple= all_var_tuple + w_poc_degree_1_tuple + w_mol_degree_1_tuple + w_poc_degree_2_tuple + w_mol_degree_2_tuple
	
	with open(file_name, 'wb') as f:
		dump(all_var_tuple, f)


def load_poc_W():
	file_name = '../weights_sda/weights_poc_sda_by_deg_W_out_relu_softmax.zip'
	pickle_keys = numpy.load(file_name).keys()
	W_self_0=numpy.load(file_name)[pickle_keys[0]]
	print ("W_self_0.shape")
	print (W_self_0.shape)
	W_degrees_0 = []
	
	for i in range(0,max_poc_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_0_d_'+str(i), borrow=True)
		W_degrees_0.append(W_d)
	
	b_prime_0=numpy.load(file_name)[pickle_keys[1+max_poc_degrees]]
	b_prime_self_0=numpy.load(file_name)[pickle_keys[2+max_poc_degrees]]
	b_layer_0=numpy.load(file_name)[pickle_keys[3+max_poc_degrees]]

	print ("b_prime_0.shape")
	print (b_prime_0.shape)
	print ("b_prime_self_0.shape")
	print (b_prime_self_0.shape)
	print ("b_layer_0.shape")
	print (b_layer_0.shape)
	
	begin_layer_1 = max_poc_degrees+4
	W_self_1=numpy.load(file_name)[pickle_keys[0+begin_layer_1]]
	print ("W_self_1.shape")
	print (W_self_1.shape)

	W_degrees_1 = []
	
	for i in range(0,max_poc_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[begin_layer_1+1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_1_d_'+str(i), borrow=True)
		W_degrees_1.append(W_d)
	b_prime_1=numpy.load(file_name)[pickle_keys[begin_layer_1+1+max_poc_degrees]]
	b_prime_self_1=numpy.load(file_name)[pickle_keys[begin_layer_1+2+max_poc_degrees]]
	b_layer_1=numpy.load(file_name)[pickle_keys[begin_layer_1+3+max_poc_degrees]]
	
	print ("b_prime_1.shape")
	print (b_prime_1.shape)
	print ("b_prime_self_1.shape")
	print (b_prime_self_1.shape)
	print ("b_layer_1.shape")
	print (b_layer_1.shape)
	
	W_self_0 = numpy.asarray(W_self_0, dtype=theano.config.floatX)
	W_self_1 = numpy.asarray(W_self_1, dtype=theano.config.floatX)
	W_self_0=theano.shared(value=W_self_0, name='W_self_0', borrow=True)
	W_self_1=theano.shared(value=W_self_1, name='W_self_1', borrow=True)

	b_prime_0 = numpy.asarray(b_prime_0, dtype=theano.config.floatX)
	b_prime_1 = numpy.asarray(b_prime_1, dtype=theano.config.floatX)
	b_prime_0=theano.shared(value=b_prime_0, name='b_prime_0', borrow=True)
	b_prime_1=theano.shared(value=b_prime_1, name='b_prime_1', borrow=True)

	b_prime_self_0 = numpy.asarray(b_prime_self_0, dtype=theano.config.floatX)
	b_prime_self_1 = numpy.asarray(b_prime_self_1, dtype=theano.config.floatX)
	b_prime_self_0=theano.shared(value=b_prime_self_0, name='b_prime_self_0', borrow=True)
	b_prime_self_1=theano.shared(value=b_prime_self_1, name='b_prime_self_1', borrow=True)

	b_layer_0 = numpy.asarray(b_layer_0, dtype=theano.config.floatX)
	b_layer_1 = numpy.asarray(b_layer_1, dtype=theano.config.floatX)
	b_layer_0=theano.shared(value=b_layer_0, name='b_layer_0', borrow=True)
	b_layer_1=theano.shared(value=b_layer_1, name='b_layer_1', borrow=True)

	begin_W_out = begin_layer_1+max_poc_degrees+4

	W_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+0]]
	print ("W_out_0.shape")
	print (W_out_0.shape)
	# W_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	# print ("W_prime_out_0.shape")
	# print (W_prime_out_0.shape)
	b_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	print ("b_out_0.shape")
	print (b_out_0.shape)
	b_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+2]]
	print ("b_prime_out_0.shape")
	print (b_prime_out_0.shape)
	W_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+3]]
	print ("W_out_1.shape")
	print (W_out_1.shape)
	# W_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	# print ("W_prime_out_1.shape")
	# print (W_prime_out_1.shape)
	b_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+4]]
	print ("b_out_1.shape")
	print (b_out_1.shape)
	b_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	print ("b_prime_out_1.shape")
	print (b_prime_out_1.shape)

	W_out_0 = numpy.asarray(W_out_0, dtype=theano.config.floatX)
	W_out_1 = numpy.asarray(W_out_1, dtype=theano.config.floatX)
	W_out_0 = theano.shared(value=W_out_0, name='W_out_0', borrow=True)
	W_out_1 = theano.shared(value=W_out_1, name='W_out_1', borrow=True)
	b_out_0 = numpy.asarray(b_out_0, dtype=theano.config.floatX)
	b_out_1 = numpy.asarray(b_out_1, dtype=theano.config.floatX)
	b_out_0 = theano.shared(value=b_out_0, name='b_out_0', borrow=True)
	b_out_1 = theano.shared(value=b_out_1, name='b_out_1', borrow=True)

	# W_prime_out_0 = numpy.asarray(W_prime_out_0, dtype=theano.config.floatX)
	# W_prime_out_1 = numpy.asarray(W_prime_out_1, dtype=theano.config.floatX)
	# W_prime_out_0 = theano.shared(value=W_prime_out_0, name='W_prime_out_0', borrow=True)
	# W_prime_out_1 = theano.shared(value=W_prime_out_1, name='W_prime_out_1', borrow=True)
	b_prime_out_0 = numpy.asarray(b_prime_out_0, dtype=theano.config.floatX)
	b_prime_out_1 = numpy.asarray(b_prime_out_1, dtype=theano.config.floatX)
	b_prime_out_0 = theano.shared(value=b_prime_out_0, name='b_prime_out_0', borrow=True)
	b_prime_out_1 = theano.shared(value=b_prime_out_1, name='b_prime_out_1', borrow=True)

	return [W_self_0, W_degrees_0, b_prime_0, b_prime_self_0, b_layer_0, W_out_0, b_out_0, b_prime_out_0, W_self_1, W_degrees_1, b_prime_1, b_prime_self_1, b_layer_1, W_out_1, b_out_1, b_prime_out_1]


def load_mol_W_layer0():
	file_name = '../weights_sda/weights_mol_sda_by_deg_W_out_relu_soft_assay.zip'
	
	pickle_keys = numpy.load(file_name).keys()
	W_self_0=numpy.load(file_name)[pickle_keys[0]]
	print ("W_self_0.shape")
	print (W_self_0.shape)
	W_degrees_0 = []
	
	for i in range(0,max_mol_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_0_d_'+str(i), borrow=True)
		W_degrees_0.append(W_d)
	
	b_prime_0=numpy.load(file_name)[pickle_keys[1+max_mol_degrees]]
	b_prime_self_0=numpy.load(file_name)[pickle_keys[2+max_mol_degrees]]
	b_layer_0=numpy.load(file_name)[pickle_keys[3+max_mol_degrees]]

	print ("b_prime_0.shape")
	print (b_prime_0.shape)
	print ("b_prime_self_0.shape")
	print (b_prime_self_0.shape)
	print ("b_layer_0.shape")
	print (b_layer_0.shape)
	
	begin_layer_1 = max_mol_degrees+4
	W_self_1=numpy.load(file_name)[pickle_keys[0+begin_layer_1]]
	print ("W_self_1.shape")
	print (W_self_1.shape)

	W_degrees_1 = []
	
	for i in range(0,max_mol_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[begin_layer_1+1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_1_d_'+str(i), borrow=True)
		W_degrees_1.append(W_d)
	b_prime_1=numpy.load(file_name)[pickle_keys[begin_layer_1+1+max_mol_degrees]]
	b_prime_self_1=numpy.load(file_name)[pickle_keys[begin_layer_1+2+max_mol_degrees]]
	b_layer_1=numpy.load(file_name)[pickle_keys[begin_layer_1+3+max_mol_degrees]]
	
	print ("b_prime_1.shape")
	print (b_prime_1.shape)
	print ("b_prime_self_1.shape")
	print (b_prime_self_1.shape)
	print ("b_layer_1.shape")
	print (b_layer_1.shape)
	
	W_self_0 = numpy.asarray(W_self_0, dtype=theano.config.floatX)
	W_self_1 = numpy.asarray(W_self_1, dtype=theano.config.floatX)
	W_self_0=theano.shared(value=W_self_0, name='W_self_0', borrow=True)
	W_self_1=theano.shared(value=W_self_1, name='W_self_1', borrow=True)

	b_prime_0 = numpy.asarray(b_prime_0, dtype=theano.config.floatX)
	b_prime_1 = numpy.asarray(b_prime_1, dtype=theano.config.floatX)
	b_prime_0=theano.shared(value=b_prime_0, name='b_prime_0', borrow=True)
	b_prime_1=theano.shared(value=b_prime_1, name='b_prime_1', borrow=True)

	b_prime_self_0 = numpy.asarray(b_prime_self_0, dtype=theano.config.floatX)
	b_prime_self_1 = numpy.asarray(b_prime_self_1, dtype=theano.config.floatX)
	b_prime_self_0=theano.shared(value=b_prime_self_0, name='b_prime_self_0', borrow=True)
	b_prime_self_1=theano.shared(value=b_prime_self_1, name='b_prime_self_1', borrow=True)

	b_layer_0 = numpy.asarray(b_layer_0, dtype=theano.config.floatX)
	b_layer_1 = numpy.asarray(b_layer_1, dtype=theano.config.floatX)
	b_layer_0=theano.shared(value=b_layer_0, name='b_layer_0', borrow=True)
	b_layer_1=theano.shared(value=b_layer_1, name='b_layer_1', borrow=True)

	begin_W_out = begin_layer_1+max_mol_degrees+4

	W_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+0]]
	print ("W_out_0.shape")
	print (W_out_0.shape)
	# W_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	# print ("W_prime_out_0.shape")
	# print (W_prime_out_0.shape)
	b_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	print ("b_out_0.shape")
	print (b_out_0.shape)
	b_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+2]]
	print ("b_prime_out_0.shape")
	print (b_prime_out_0.shape)
	W_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+3]]
	print ("W_out_1.shape")
	print (W_out_1.shape)
	# W_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	# print ("W_prime_out_1.shape")
	# print (W_prime_out_1.shape)
	b_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+4]]
	print ("b_out_1.shape")
	print (b_out_1.shape)
	b_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	print ("b_prime_out_1.shape")
	print (b_prime_out_1.shape)

	W_out_0 = numpy.asarray(W_out_0, dtype=theano.config.floatX)
	W_out_1 = numpy.asarray(W_out_1, dtype=theano.config.floatX)
	W_out_0 = theano.shared(value=W_out_0, name='W_out_0', borrow=True)
	W_out_1 = theano.shared(value=W_out_1, name='W_out_1', borrow=True)
	b_out_0 = numpy.asarray(b_out_0, dtype=theano.config.floatX)
	b_out_1 = numpy.asarray(b_out_1, dtype=theano.config.floatX)
	b_out_0 = theano.shared(value=b_out_0, name='b_out_0', borrow=True)
	b_out_1 = theano.shared(value=b_out_1, name='b_out_1', borrow=True)

	# W_prime_out_0 = numpy.asarray(W_prime_out_0, dtype=theano.config.floatX)
	# W_prime_out_1 = numpy.asarray(W_prime_out_1, dtype=theano.config.floatX)
	# W_prime_out_0 = theano.shared(value=W_prime_out_0, name='W_prime_out_0', borrow=True)
	# W_prime_out_1 = theano.shared(value=W_prime_out_1, name='W_prime_out_1', borrow=True)
	b_prime_out_0 = numpy.asarray(b_prime_out_0, dtype=theano.config.floatX)
	b_prime_out_1 = numpy.asarray(b_prime_out_1, dtype=theano.config.floatX)
	b_prime_out_0 = theano.shared(value=b_prime_out_0, name='b_prime_out_0', borrow=True)
	b_prime_out_1 = theano.shared(value=b_prime_out_1, name='b_prime_out_1', borrow=True)

	return [W_self_0, W_degrees_0, b_prime_0, b_prime_self_0, b_layer_0, W_out_0, b_out_0, b_prime_out_0]


def load_mol_W_layer1():
	file_name = '../weights_sda/weights_mol_sda_by_deg_W_out_relu_soft_assay.zip'
	pickle_keys = numpy.load(file_name).keys()
	W_self_0=numpy.load(file_name)[pickle_keys[0]]
	print ("W_self_0.shape")
	print (W_self_0.shape)
	W_degrees_0 = []
	
	for i in range(0,max_mol_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_0_d_'+str(i), borrow=True)
		W_degrees_0.append(W_d)
	
	b_prime_0=numpy.load(file_name)[pickle_keys[1+max_mol_degrees]]
	b_prime_self_0=numpy.load(file_name)[pickle_keys[2+max_mol_degrees]]
	b_layer_0=numpy.load(file_name)[pickle_keys[3+max_mol_degrees]]

	print ("b_prime_0.shape")
	print (b_prime_0.shape)
	print ("b_prime_self_0.shape")
	print (b_prime_self_0.shape)
	print ("b_layer_0.shape")
	print (b_layer_0.shape)
	
	begin_layer_1 = max_mol_degrees+4
	W_self_1=numpy.load(file_name)[pickle_keys[0+begin_layer_1]]
	print ("W_self_1.shape")
	print (W_self_1.shape)

	W_degrees_1 = []
	
	for i in range(0,max_mol_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[begin_layer_1+1+i]], dtype=theano.config.floatX)
		print ("W_d.shape")
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_degree_1_d_'+str(i), borrow=True)
		W_degrees_1.append(W_d)
	b_prime_1=numpy.load(file_name)[pickle_keys[begin_layer_1+1+max_mol_degrees]]
	b_prime_self_1=numpy.load(file_name)[pickle_keys[begin_layer_1+2+max_mol_degrees]]
	b_layer_1=numpy.load(file_name)[pickle_keys[begin_layer_1+3+max_mol_degrees]]
	
	print ("b_prime_1.shape")
	print (b_prime_1.shape)
	print ("b_prime_self_1.shape")
	print (b_prime_self_1.shape)
	print ("b_layer_1.shape")
	print (b_layer_1.shape)
	
	W_self_0 = numpy.asarray(W_self_0, dtype=theano.config.floatX)
	W_self_1 = numpy.asarray(W_self_1, dtype=theano.config.floatX)
	W_self_0=theano.shared(value=W_self_0, name='W_self_0', borrow=True)
	W_self_1=theano.shared(value=W_self_1, name='W_self_1', borrow=True)

	b_prime_0 = numpy.asarray(b_prime_0, dtype=theano.config.floatX)
	b_prime_1 = numpy.asarray(b_prime_1, dtype=theano.config.floatX)
	b_prime_0=theano.shared(value=b_prime_0, name='b_prime_0', borrow=True)
	b_prime_1=theano.shared(value=b_prime_1, name='b_prime_1', borrow=True)

	b_prime_self_0 = numpy.asarray(b_prime_self_0, dtype=theano.config.floatX)
	b_prime_self_1 = numpy.asarray(b_prime_self_1, dtype=theano.config.floatX)
	b_prime_self_0=theano.shared(value=b_prime_self_0, name='b_prime_self_0', borrow=True)
	b_prime_self_1=theano.shared(value=b_prime_self_1, name='b_prime_self_1', borrow=True)

	b_layer_0 = numpy.asarray(b_layer_0, dtype=theano.config.floatX)
	b_layer_1 = numpy.asarray(b_layer_1, dtype=theano.config.floatX)
	b_layer_0=theano.shared(value=b_layer_0, name='b_layer_0', borrow=True)
	b_layer_1=theano.shared(value=b_layer_1, name='b_layer_1', borrow=True)

	begin_W_out = begin_layer_1+max_mol_degrees+4

	W_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+0]]
	print ("W_out_0.shape")
	print (W_out_0.shape)
	# W_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	# print ("W_prime_out_0.shape")
	# print (W_prime_out_0.shape)
	b_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+1]]
	print ("b_out_0.shape")
	print (b_out_0.shape)
	b_prime_out_0 = numpy.load(file_name)[pickle_keys[begin_W_out+2]]
	print ("b_prime_out_0.shape")
	print (b_prime_out_0.shape)
	W_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+3]]
	print ("W_out_1.shape")
	print (W_out_1.shape)
	# W_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	# print ("W_prime_out_1.shape")
	# print (W_prime_out_1.shape)
	b_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+4]]
	print ("b_out_1.shape")
	print (b_out_1.shape)
	b_prime_out_1 = numpy.load(file_name)[pickle_keys[begin_W_out+5]]
	print ("b_prime_out_1.shape")
	print (b_prime_out_1.shape)

	W_out_0 = numpy.asarray(W_out_0, dtype=theano.config.floatX)
	W_out_1 = numpy.asarray(W_out_1, dtype=theano.config.floatX)
	W_out_0 = theano.shared(value=W_out_0, name='W_out_0', borrow=True)
	W_out_1 = theano.shared(value=W_out_1, name='W_out_1', borrow=True)
	b_out_0 = numpy.asarray(b_out_0, dtype=theano.config.floatX)
	b_out_1 = numpy.asarray(b_out_1, dtype=theano.config.floatX)
	b_out_0 = theano.shared(value=b_out_0, name='b_out_0', borrow=True)
	b_out_1 = theano.shared(value=b_out_1, name='b_out_1', borrow=True)

	# W_prime_out_0 = numpy.asarray(W_prime_out_0, dtype=theano.config.floatX)
	# W_prime_out_1 = numpy.asarray(W_prime_out_1, dtype=theano.config.floatX)
	# W_prime_out_0 = theano.shared(value=W_prime_out_0, name='W_prime_out_0', borrow=True)
	# W_prime_out_1 = theano.shared(value=W_prime_out_1, name='W_prime_out_1', borrow=True)
	b_prime_out_0 = numpy.asarray(b_prime_out_0, dtype=theano.config.floatX)
	b_prime_out_1 = numpy.asarray(b_prime_out_1, dtype=theano.config.floatX)
	b_prime_out_0 = theano.shared(value=b_prime_out_0, name='b_prime_out_0', borrow=True)
	b_prime_out_1 = theano.shared(value=b_prime_out_1, name='b_prime_out_1', borrow=True)

	return [W_self_1, W_degrees_1, b_prime_1, b_prime_self_1, b_layer_1, W_out_1, b_out_1, b_prime_out_1]


def load_weights_pickle(Weights_file_name):
	# W_poc_degrees_1, W_poc_degrees_2, W_mol_degrees_1, W_mol_degrees_2

	print (Weights_file_name)
	file_name = Weights_file_name
	pickle_keys = numpy.load(file_name).keys()
	W_poc_self_1=numpy.load(file_name)[pickle_keys[0]]
	print (W_poc_self_1.shape)
	W_poc_self_2=numpy.load(file_name)[pickle_keys[1]]
	print (W_poc_self_2.shape)
	W_poc_out=numpy.load(file_name)[pickle_keys[2]]
	print (W_poc_out.shape)
	W_mol_self_1=numpy.load(file_name)[pickle_keys[3]]
	print (W_mol_self_1.shape)
	W_mol_self_2=numpy.load(file_name)[pickle_keys[4]]
	print (W_mol_self_2.shape)
	W_mol_out=numpy.load(file_name)[pickle_keys[5]]
	print (W_mol_out.shape)
	W_hid=numpy.load(file_name)[pickle_keys[6]]
	print (W_hid.shape)
	W_log=numpy.load(file_name)[pickle_keys[7]]
	print (W_log.shape)

	b_poc_layer_1=numpy.load(file_name)[pickle_keys[8]]
	print (b_poc_layer_1.shape)
	b_poc_layer_2=numpy.load(file_name)[pickle_keys[9]]
	print (b_poc_layer_2.shape)
	b_poc_out=numpy.load(file_name)[pickle_keys[10]]
	print (b_poc_out.shape)
	b_mol_layer_1=numpy.load(file_name)[pickle_keys[11]]
	print (b_mol_layer_1.shape)
	b_mol_layer_2=numpy.load(file_name)[pickle_keys[12]]
	print (b_mol_layer_2.shape)
	b_mol_out=numpy.load(file_name)[pickle_keys[13]]
	print (b_mol_out.shape)
	b_hid=numpy.load(file_name)[pickle_keys[14]]
	print (b_hid.shape)
	b_log=numpy.load(file_name)[pickle_keys[15]]
	print (b_log.shape)

	
	W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[16]], dtype=theano.config.floatX)
	print (W_d.shape)
	W_d=theano.shared(value=W_d, name='W_poc_l_1_d_'+str(0), borrow=True)
	
	W_poc_degrees_1 = [W_d]

	for i in range(1,max_poc_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[17+i]], dtype=theano.config.floatX)
		print i
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_poc_l_1_d_'+str(i), borrow=True)
		W_poc_degrees_1.append(W_d)

	W_mol_degrees_1 = []
	for i in range(0,max_mol_degrees):
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[17+max_poc_degrees+i]], dtype=theano.config.floatX)
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_mol_l_1_d_'+str(i), borrow=True)
		W_mol_degrees_1.append(W_d)


	W_poc_degrees_2 = []
	for i in range(0,max_poc_degrees):
		print i
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[17+max_poc_degrees+max_mol_degrees+i]], dtype=theano.config.floatX)
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_poc_l_2_d_'+str(i), borrow=True)
		W_poc_degrees_2.append(W_d)

	W_mol_degrees_2 = []
	for i in range(0,max_mol_degrees):
		print i
		W_d = numpy.asarray(numpy.load(file_name)[pickle_keys[17+2*max_poc_degrees+max_mol_degrees+i]], dtype=theano.config.floatX)
		print (W_d.shape)
		W_d=theano.shared(value=W_d, name='W_mol_l_2_d_'+str(i), borrow=True)
		W_mol_degrees_2.append(W_d)


	W_poc_self_1=numpy.asarray(W_poc_self_1,dtype=theano.config.floatX)
	W_poc_self_2=numpy.asarray(W_poc_self_2,dtype=theano.config.floatX)
	W_poc_out=numpy.asarray(W_poc_out,dtype=theano.config.floatX)
	W_mol_self_1=numpy.asarray(W_mol_self_1,dtype=theano.config.floatX)
	W_mol_self_2=numpy.asarray(W_mol_self_2,dtype=theano.config.floatX)
	W_mol_out=numpy.asarray(W_mol_out,dtype=theano.config.floatX)
	W_hid=numpy.asarray(W_hid,dtype=theano.config.floatX)
	W_log=numpy.asarray(W_log,dtype=theano.config.floatX)
	b_poc_layer_1=numpy.asarray(b_poc_layer_1,dtype=theano.config.floatX)
	b_poc_layer_2=numpy.asarray(b_poc_layer_2,dtype=theano.config.floatX)
	b_poc_out=numpy.asarray(b_poc_out,dtype=theano.config.floatX)
	b_mol_layer_1=numpy.asarray(b_mol_layer_1,dtype=theano.config.floatX)
	b_mol_layer_2=numpy.asarray(b_mol_layer_2,dtype=theano.config.floatX)
	b_mol_out=numpy.asarray(b_mol_out,dtype=theano.config.floatX)
	b_hid=numpy.asarray(b_hid,dtype=theano.config.floatX)
	b_log=numpy.asarray(b_log,dtype=theano.config.floatX)

	W_poc_self_1=theano.shared(value=W_poc_self_1, name='W_poc_self_1', borrow=True)
	W_poc_self_2=theano.shared(value=W_poc_self_2, name='W_poc_self_2', borrow=True)
	W_poc_out=theano.shared(value=W_poc_out, name='W_poc_out', borrow=True)
	W_mol_self_1=theano.shared(value=W_mol_self_1, name='W_mol_self_1', borrow=True)
	W_mol_self_2=theano.shared(value=W_mol_self_2, name='W_mol_self_2', borrow=True)
	W_mol_out=theano.shared(value=W_mol_out, name='W_mol_out', borrow=True)
	W_hid=theano.shared(value=W_hid, name='W_hid', borrow=True)
	W_log=theano.shared(value=W_log, name='W_log', borrow=True)

  
	b_poc_layer_1=theano.shared(value=b_poc_layer_1, name='b_poc_layer_1', borrow=True)
	b_poc_layer_2=theano.shared(value=b_poc_layer_2, name='b_poc_layer_2', borrow=True)
	b_poc_out=theano.shared(value=b_poc_out, name='b_poc_out', borrow=True)
	b_mol_layer_1=theano.shared(value=b_mol_layer_1, name='b_mol_layer_1', borrow=True)
	b_mol_layer_2=theano.shared(value=b_mol_layer_2, name='b_mol_layer_2', borrow=True)
	b_mol_out=theano.shared(value=b_mol_out, name='b_mol_out', borrow=True)
	b_hid=theano.shared(value=b_hid, name='b_hid', borrow=True)
	b_log=theano.shared(value=b_log, name='b_log', borrow=True)

	return [W_poc_self_1,  W_poc_self_2,  W_poc_out, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_hid, W_log, b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log, W_poc_degrees_1, W_mol_degrees_1, W_poc_degrees_2, W_mol_degrees_2]


# class Graph_Conv_mol(object):
# 	def __init__(self, rng, b_layer, W_self, W_degrees, num_atom_features, num_bond_features, num_hidden_features, num_input, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mask, max_nodes_in_mol=20, max_mol_degrees=6):
# 		"""Sets up functions to compute convnets over all molecules in a minibatch together."""
		
# 		self.num_hidden_features = num_hidden_features
# 		self.num_atom_features = num_atom_features
# 		self.num_bond_features = num_bond_features
# 		self.mol_atom_features = mol_atom_features
# 		self.mol_bond_features = mol_bond_features
# 		self.mol_atom_neighbors = mol_atom_neighbors
# 		self.mol_bond_neighbors = mol_bond_neighbors
	   

# 		if b_layer is None:
# 			bl_values = numpy.zeros((num_hidden_features,), dtype=theano.config.floatX)
# 			self.b_layer = theano.shared(value=bl_values, borrow=True)
# 		else:
# 			self.b_layer = b_layer

# 		if W_self is None:
# 			W_self_bound = numpy.sqrt(6. / (num_atom_features + num_hidden_features))
# 			self.W_self = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_atom_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
# 		else:
# 			self.W_self = W_self

# 		self.W_degrees = []
# 		if W_degrees is None:
# 			for degree in range(max_mol_degrees):
# 				W_d = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_atom_features+num_bond_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
# 				self.W_degrees.append(W_d)
# 		else:
# 			for degree in range(max_mol_degrees):
# 				self.W_degrees.append(W_degrees[degree])


# 		self.self_activations = T.dot(mol_atom_features, self.W_self)

# 		summed_atom_neighbors = T.dot(mol_atom_neighbors, mol_atom_features)
# 		summed_bond_neighbors = T.dot(mol_bond_neighbors, mol_bond_features)
# 		summed_neighbors = T.concatenate([summed_atom_neighbors, summed_bond_neighbors], axis=1)

# 		for d in range(max_mol_degrees):
# 			if d == 0:
# 				activations = T.dot(summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
# 			else:
# 				tmp = T.dot(summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
# 				activations = T.concatenate([activations, tmp], axis=2)

# 		mol_degrees_newaixs=mol_degrees.dimshuffle(0,'x',1)
# 		neighbour_activations = T.sum(activations*mol_degrees_newaixs,axis=2)

		
# 		total_activations = self.self_activations + neighbour_activations + self.b_layer.dimshuffle('x',0)
# 		self.layer_conv=relu(total_activations)

class Graph_Conv_mol(object):
	def __init__(self, rng, b_layer, W_self, W_degrees, num_atom_features, num_bond_features, num_hidden_features, num_input, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mask, max_nodes_in_mol=20, max_mol_degrees=6):
		"""Sets up functions to compute convnets over all molecules in a minibatch together."""
		
		self.num_hidden_features = num_hidden_features
		self.num_atom_features = num_atom_features
		self.num_bond_features = num_bond_features
		self.mol_atom_features = mol_atom_features
		self.mol_bond_features = mol_bond_features
		self.mol_atom_neighbors = mol_atom_neighbors
		self.mol_bond_neighbors = mol_bond_neighbors
	   

		if b_layer is None:
			bl_values = numpy.zeros((num_hidden_features,), dtype=theano.config.floatX)
			self.b_layer = theano.shared(value=bl_values, borrow=True)
		else:
			self.b_layer = b_layer

		if W_self is None:
			W_self_bound = numpy.sqrt(6. / (num_atom_features + num_hidden_features))
			self.W_self = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_atom_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
		else:
			self.W_self = W_self

		self.W_degrees = []
		if W_degrees is None:
			for degree in range(max_mol_degrees):
				W_d = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_atom_features+num_bond_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
				self.W_degrees.append(W_d)
		else:
			for degree in range(max_mol_degrees):
				self.W_degrees.append(W_degrees[degree])


		self.self_activations = T.dot(mol_atom_features, self.W_self)
		atom_deg = T.sum(mol_atom_neighbors,axis=1).reshape((num_input*max_nodes_in_mol,1))
		bond_deg = T.sum(mol_bond_neighbors,axis=1).reshape((num_input*max_nodes_in_mol,1))
		
		summed_atom_neighbors = T.dot(mol_atom_neighbors, mol_atom_features) / (atom_deg+1e-8)
		summed_bond_neighbors = T.dot(mol_bond_neighbors, mol_bond_features) / (bond_deg+1e-8)
		summed_neighbors = T.concatenate([summed_atom_neighbors, summed_bond_neighbors], axis=1)

		for d in range(max_mol_degrees):
			if d == 0:
				activations = T.dot(summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
			else:
				tmp = T.dot(summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
				activations = T.concatenate([activations, tmp], axis=2)

		mol_degrees_newaixs=mol_degrees.dimshuffle(0,'x',1)
		neighbour_activations = T.sum(activations*mol_degrees_newaixs,axis=2)

		
		total_activations = self.self_activations + neighbour_activations + self.b_layer.dimshuffle('x',0)
		self.layer_conv=relu(total_activations)

class MOL_FP_Output(object):
	def __init__(self, rng, W_out, b_out, num_input, num_atom_features, mol_atom_features, mask, fp_length=216, max_nodes_in_mol=20):

		if W_out is None:
			Wo_bound = numpy.sqrt(6. / (num_atom_features + fp_length))
			self.W_out = theano.shared(numpy.asarray(rng.uniform(low=-Wo_bound, high=Wo_bound, size=(num_atom_features,fp_length)),dtype=theano.config.floatX),borrow=True)
		else:
			self.W_out = W_out

		if b_out is None:
			bo_values = numpy.zeros((fp_length,), dtype=theano.config.floatX)
			self.b_out = theano.shared(value=bo_values, borrow=True)
		else:
			self.b_out = b_out

		mol_features_reshaped = T.reshape(mol_atom_features, (int(num_input*max_nodes_in_mol), int(num_atom_features)), ndim=2)
		mol_outputs = T.nnet.softmax(self.b_out.dimshuffle('x', 0) + T.dot(mol_features_reshaped, self.W_out))
		self.layer_fp = sum_and_stack_atoms(mol_outputs, mask, num_input, max_nodes_in_mol, fp_length)


class POC_FP_Output(object):
	def __init__(self, rng, W_out, b_out,  num_input, num_in_features, env_features, mask, fp_length=512, max_nodes_in_poc=50):

		if W_out is None:
			Wo_bound = numpy.sqrt(6. / (num_in_features + fp_length))
			self.W_out = theano.shared(numpy.asarray(rng.uniform(low=-Wo_bound, high=Wo_bound, size=(num_in_features,fp_length)),dtype=theano.config.floatX),borrow=True)
		else:
			self.W_out = W_out

		if b_out is None:
			bo_values = numpy.zeros((fp_length,), dtype=theano.config.floatX)
			self.b_out = theano.shared(value=bo_values, borrow=True)
		else: 
			self.b_out = b_out

		self.env_features_reshaped = T.reshape(env_features, (int(num_input*max_nodes_in_poc), int(num_in_features)), ndim=2)
		self.env_outputs = T.nnet.softmax(self.b_out.dimshuffle('x', 0) + T.dot(self.env_features_reshaped, self.W_out))
		self.layer_fp = sum_and_stack(self.env_outputs, mask, num_input*max_nodes_in_poc, fp_length, max_nodes_in_poc)


class Graph_Conv(object):
	def __init__(self, rng, b_layer, W_self, W_degrees, num_in_features, num_hidden_features, num_input, env_features, env_neighbors, env_degrees, mask, max_nodes_in_poc=50, max_poc_degrees=20):
		"""Sets up functions to compute convnets over all molecules in a minibatch together."""

		self.num_hidden_features = num_hidden_features
		self.num_in_features = num_in_features
		self.env_features = env_features
		##################

		if b_layer is None:
			bl_values = numpy.zeros((num_hidden_features,), dtype=theano.config.floatX)
			self.b_layer = theano.shared(value=bl_values, borrow=True)
		else:
			self.b_layer = b_layer

		if W_self is None:
			W_self_bound = numpy.sqrt(6. / (num_in_features + num_hidden_features))
			self.W_self = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_in_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
		else:
			self.W_self = W_self

		self.W_degrees = []
		if W_degrees is None:
			for degree in range(max_poc_degrees):
				W_d = theano.shared(numpy.asarray(rng.uniform(low=-W_self_bound, high=W_self_bound, size=(num_in_features,num_hidden_features)),dtype=theano.config.floatX),borrow=True)
				self.W_degrees.append(W_d)
		else:
			for degree in range(max_poc_degrees):
				self.W_degrees.append(W_degrees[degree])


		self.self_activations = T.dot(env_features, self.W_self)
		degree_idx_order=[]

		deg = T.sum(env_neighbors,axis=1).reshape((num_input*max_nodes_in_poc,1))
		self.summed_neighbors = T.dot(env_neighbors, env_features) / (deg+1e-8)

		for d in range(max_poc_degrees):
			if d == 0:
				activations = T.dot(self.summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
			else:
				tmp = T.dot(self.summed_neighbors, self.W_degrees[d]).dimshuffle(0,1,'x')
				activations = T.concatenate([activations, tmp], axis=2)

		env_degrees_newaixs=env_degrees.dimshuffle(0,'x',1)
		neighbour_activations = T.sum(activations*env_degrees_newaixs,axis=2)

		total_activations = self.self_activations + neighbour_activations + self.b_layer.dimshuffle('x',0)
		self.layer_conv=relu(total_activations)


def _dropout_from_layer(rng, layer, p):
	"""p is the probablity of dropping a unit
	"""
	srng = theano.tensor.shared_randomstreams.RandomStreams(
			rng.randint(999999))
	# p=1-p because 1's indicate keep and p is prob of dropping
	mask = srng.binomial(n=1, p=1-p, size=layer.shape)
	# The cast is important because
	# int * float32 = float64 which pulls things off the gpu
	output = layer * T.cast(mask, theano.config.floatX)
	return output

class DropoutHiddenLayer(HiddenLayer):
	def __init__(self, rng, input, n_in, n_out,
				 activation, dropout_rate, use_bias=True, W=None, b=None):
		super(DropoutHiddenLayer, self).__init__(
				rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b,
				activation=activation)

		self.output = _dropout_from_layer(rng, self.output, p=dropout_rate)

class DropoutGraphConvo(Graph_Conv):

	def __init__(self, rng, b_layer, W_self, W_degrees, num_in_features, num_hidden_features, num_input, env_features, env_neighbors, env_degrees, mask, dropout_rate=0.3, max_nodes_in_poc=50, max_poc_degrees=20):
		super(DropoutGraphConvo, self).__init__(
				rng=rng, W_self=W_self, b_layer=b_layer, W_degrees=W_degrees, num_in_features=num_in_features, num_hidden_features=num_hidden_features, num_input=num_input, env_features=env_features, env_neighbors=env_neighbors, env_degrees=env_degrees, mask=mask,
				max_nodes_in_poc=max_nodes_in_poc, max_poc_degrees=max_poc_degrees)
		self.layer_conv = _dropout_from_layer(rng, self.layer_conv, p=dropout_rate)
	
class DropoutGraphConvMol(Graph_Conv_mol):
	def __init__(self, rng, W_self, b_layer, W_degrees, num_atom_features, num_bond_features, num_hidden_features, num_input, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mask, dropout_rate, max_nodes_in_mol=80, max_mol_degrees=6):
		super(DropoutGraphConvMol, self).__init__(
				rng=rng, W_self=W_self, b_layer=b_layer, W_degrees=W_degrees, num_atom_features=num_atom_features, 
				num_bond_features=num_bond_features, num_hidden_features=num_hidden_features, num_input=num_input, mol_atom_features=mol_atom_features, mol_bond_features=mol_bond_features, 
				mol_atom_neighbors=mol_atom_neighbors, mol_bond_neighbors=mol_bond_neighbors, mol_degrees=mol_degrees, mask=mask,
				max_nodes_in_mol=max_nodes_in_mol, max_mol_degrees=max_mol_degrees)
		self.layer_conv = _dropout_from_layer(rng, self.layer_conv, p=dropout_rate)

class DropoutPocOutput(POC_FP_Output):

	def __init__(self, rng, W_out, b_out,  num_input, num_in_features, env_features, mask, dropout_rate=0.3, fp_length=512, max_nodes_in_poc=50):
		super(DropoutPocOutput, self).__init__(
				rng=rng, W_out=W_out, b_out=b_out, num_input=num_input, num_in_features=num_in_features, env_features=env_features, mask=mask, fp_length=fp_length, max_nodes_in_poc=max_nodes_in_poc)
		self.layer_fp = _dropout_from_layer(rng, self.layer_fp, p=dropout_rate)


class DropoutMolOutput(MOL_FP_Output):
	def __init__(self, rng, W_out, b_out, num_input, num_atom_features, mol_atom_features, mask, dropout_rate=0.3, fp_length=216, max_nodes_in_mol=20):
		super(DropoutMolOutput, self).__init__(
				rng=rng, W_out=W_out, b_out=b_out, num_input=num_input, num_atom_features=num_atom_features, 
				mol_atom_features=mol_atom_features, mask=mask, fp_length=fp_length,max_nodes_in_mol=max_nodes_in_mol)
		self.layer_fp = _dropout_from_layer(rng, self.layer_fp, p=dropout_rate)


class Conv3D_Graph_Conv(object):

	def __init__(
		self,
		numpy_rng,
		Weights,
		Bias,
		env_features, 
		env_neighbors,
		env_mask,
		env_degrees,
		mol_atom_features,
		mol_bond_features, 
		mol_atom_neighbors,
		mol_bond_neighbors,
		mol_mask,
		mol_degrees,
		labels,
		num_input,
		num_poc_in_features=480, 
		num_mol_atom_features=62, 
		num_mol_bond_features=6, 
		poc_fp_length=512,
		mol_fp_length=216,
		dropout_rate=0.3,
		theano_rng=None
	):
		

		self.tparams = OrderedDict()

		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

		if labels is None:
			self.y = T.ivector('y')
		else:
			self.y = labels

		[W_poc_self_1,  W_poc_self_2,  W_poc_out, W_poc_degrees_1, W_poc_degrees_2, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_mol_degrees_1, W_mol_degrees_2, W_hid, W_log] = Weights 
		[b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log] = Bias

		######################### Poc Graph #################################

		self.Drop_Graph_poc_layer_1 = DropoutGraphConvo(rng=numpy_rng, 
			b_layer=b_poc_layer_1, 
			W_self=W_poc_self_1, 
			W_degrees=W_poc_degrees_1, 
			num_in_features=num_poc_in_features,
			num_hidden_features=200, 
			num_input=num_input,
			env_features=env_features, 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask,
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees,
			dropout_rate=dropout_rate
			)

		corrected_poc_degree_1 = []
		for d in range(0,max_poc_degrees):
			corrected_poc_degree_1.append(self.Drop_Graph_poc_layer_1.W_degrees[d]* (1 - dropout_rate))

		self.Graph_poc_layer_1 = Graph_Conv(rng=numpy_rng,  
			b_layer=self.Drop_Graph_poc_layer_1.b_layer* (1 - dropout_rate), 
			W_self=self.Drop_Graph_poc_layer_1.W_self* (1 - dropout_rate),  
			W_degrees=corrected_poc_degree_1, 
			num_in_features=num_poc_in_features,
			num_hidden_features=200, 
			num_input=num_input,
			env_features=env_features, 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask,
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees
			)

		#########################

		self.Drop_Graph_poc_layer_2 = DropoutGraphConvo(rng=numpy_rng,
			b_layer=b_poc_layer_2, 
			W_self=W_poc_self_2, 
			W_degrees=W_poc_degrees_2,
			num_in_features=200,
			num_hidden_features=100, 
			num_input=num_input,
			env_features=self.Drop_Graph_poc_layer_1.layer_conv, 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask, 
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees,
			dropout_rate=dropout_rate
			)

		self.tparams['W_poc_self_2'] = self.Drop_Graph_poc_layer_2.W_self
		self.tparams['b_poc_layer_2'] = self.Drop_Graph_poc_layer_2.b_layer

		for d in range(0,max_poc_degrees):
			self.tparams['W_poc_l_2_d_'+str(d)] = self.Drop_Graph_poc_layer_2.W_degrees[d]

		corrected_poc_degree_2 = []
		for d in range(0,max_poc_degrees):
			corrected_poc_degree_2.append(self.Drop_Graph_poc_layer_2.W_degrees[d]* (1 - dropout_rate))


		self.Graph_poc_layer_2 = Graph_Conv(rng=numpy_rng,  
			b_layer=self.Drop_Graph_poc_layer_2.b_layer* (1 - dropout_rate), 
			W_self=self.Drop_Graph_poc_layer_2.W_self* (1 - dropout_rate), 
			W_degrees=corrected_poc_degree_2,
			num_in_features=200,
			num_hidden_features=100, 
			num_input=num_input,
			env_features=self.Graph_poc_layer_1.layer_conv, 
			env_neighbors=env_neighbors,
			env_degrees=env_degrees, 
			mask=env_mask, 
			max_nodes_in_poc=max_nodes_in_poc,
			max_poc_degrees=max_poc_degrees
			)

		#########################

		self.Drop_POC_FP_layer = DropoutPocOutput(
			rng=numpy_rng, 
			W_out=W_poc_out, 
			b_out=b_poc_out,
			num_in_features=100,
			num_input=num_input,
			env_features=self.Drop_Graph_poc_layer_2.layer_conv, 
			mask=env_mask,
			fp_length=poc_fp_length,
			max_nodes_in_poc=max_nodes_in_poc,
			dropout_rate=dropout_rate
			)

		self.tparams['W_poc_out'] = self.Drop_POC_FP_layer.W_out
		self.tparams['b_poc_out'] = self.Drop_POC_FP_layer.b_out
		

		self.POC_FP_layer = POC_FP_Output(rng=numpy_rng, 
			W_out=self.Drop_POC_FP_layer.W_out* (1 - dropout_rate), 
			b_out=self.Drop_POC_FP_layer.b_out* (1 - dropout_rate), 
			num_in_features=100,
			num_input=num_input,
			env_features=self.Graph_poc_layer_2.layer_conv, 
			mask=env_mask,
			fp_length=poc_fp_length,
			max_nodes_in_poc=max_nodes_in_poc
			)

		
		######################### Mol Graph #################################

		self.Drop_Graph_mol_layer_1 = DropoutGraphConvMol(rng=numpy_rng,
			b_layer=b_mol_layer_1,
			W_self=W_mol_self_1,
			W_degrees=W_mol_degrees_1,
			num_atom_features=num_mol_atom_features,
			num_bond_features=num_mol_bond_features,
			num_hidden_features=200, 
			num_input=num_input,
			mol_atom_features=mol_atom_features, 
			mol_bond_features=mol_bond_features, 
			mol_atom_neighbors=mol_atom_neighbors,
			mol_bond_neighbors=mol_bond_neighbors,
			mol_degrees=mol_degrees, 
			mask=mol_mask, 
			max_nodes_in_mol=max_nodes_in_mol,
			max_mol_degrees=max_mol_degrees,
			dropout_rate=dropout_rate
			)

		self.tparams['W_mol_self_1'] = self.Drop_Graph_mol_layer_1.W_self
		self.tparams['b_mol_layer_1'] = self.Drop_Graph_mol_layer_1.b_layer

		for d in range(0,max_mol_degrees):
			self.tparams['W_mol_l_1_d_'+str(d)] = self.Drop_Graph_mol_layer_1.W_degrees[d]

		corrected_mol_degree_1 = []
		for d in range(0,max_mol_degrees):
			corrected_mol_degree_1.append(self.Drop_Graph_mol_layer_1.W_degrees[d]* (1 - dropout_rate))


		self.Graph_mol_layer_1 = Graph_Conv_mol(rng=numpy_rng, 
			b_layer=self.Drop_Graph_mol_layer_1.b_layer* (1 - dropout_rate), 
			W_self=self.Drop_Graph_mol_layer_1.W_self* (1 - dropout_rate), 
			W_degrees=corrected_mol_degree_1,
			num_atom_features=num_mol_atom_features,
			num_bond_features=num_mol_bond_features,
			num_hidden_features=200, 
			num_input=num_input,
			mol_atom_features=mol_atom_features, 
			mol_bond_features=mol_bond_features, 
			mol_atom_neighbors=mol_atom_neighbors,
			mol_bond_neighbors=mol_bond_neighbors,
			mol_degrees=mol_degrees, 
			mask=mol_mask, 
			max_nodes_in_mol=max_nodes_in_mol,
			max_mol_degrees=max_mol_degrees
			)

		#########################

		self.Drop_Graph_mol_layer_2 = DropoutGraphConvo(rng=numpy_rng,
			b_layer=b_mol_layer_2,
			W_self=W_mol_self_2,
			W_degrees=W_mol_degrees_2,
			num_in_features=200,
			num_hidden_features=100, 
			num_input=num_input,
			env_features=self.Drop_Graph_mol_layer_1.layer_conv, 
			env_neighbors=mol_atom_neighbors,
			env_degrees=mol_degrees, 
			mask=mol_mask,
			max_nodes_in_poc=max_nodes_in_mol,
			max_poc_degrees=max_mol_degrees,
			dropout_rate=dropout_rate
			)

		self.tparams['W_mol_self_2'] = self.Drop_Graph_mol_layer_2.W_self
		self.tparams['b_mol_layer_2'] = self.Drop_Graph_mol_layer_2.b_layer

		for d in range(0,max_mol_degrees):
			self.tparams['W_mol_l_2_d_'+str(d)] = self.Drop_Graph_mol_layer_2.W_degrees[d]

		corrected_mol_degree_2 = []
		for d in range(0,max_mol_degrees):
			corrected_mol_degree_2.append(self.Drop_Graph_mol_layer_2.W_degrees[d]* (1 - dropout_rate))


		self.Graph_mol_layer_2 = Graph_Conv(rng=numpy_rng, 
			b_layer=self.Drop_Graph_mol_layer_2.b_layer* (1 - dropout_rate), 
			W_self=self.Drop_Graph_mol_layer_2.W_self* (1 - dropout_rate), 
			W_degrees=corrected_mol_degree_2, 
			num_in_features=200,
			num_hidden_features=100, 
			num_input=num_input,
			env_features=self.Graph_mol_layer_1.layer_conv, 
			env_neighbors=mol_atom_neighbors,
			env_degrees=mol_degrees, 
			mask=mol_mask,
			max_nodes_in_poc=max_nodes_in_mol,
			max_poc_degrees=max_mol_degrees
			)

		#########################

		self.Drop_MOL_FP_layer = DropoutMolOutput(
			rng=numpy_rng, 
			W_out=W_mol_out, 
			b_out=b_mol_out, 
			num_input=num_input,
			num_atom_features=100,
			mol_atom_features=self.Graph_mol_layer_2.layer_conv, 
			mask=mol_mask,
			max_nodes_in_mol=max_nodes_in_mol, 
			fp_length=mol_fp_length,
			dropout_rate=dropout_rate
			)

		self.tparams['W_mol_out'] = self.Drop_MOL_FP_layer.W_out
		self.tparams['b_mol_out'] = self.Drop_MOL_FP_layer.b_out
		

		self.MOL_FP_layer = MOL_FP_Output(rng=numpy_rng, 
			W_out=self.Drop_MOL_FP_layer.W_out* (1 - dropout_rate), 
			b_out=self.Drop_MOL_FP_layer.b_out* (1 - dropout_rate), 
			num_input=num_input,
			num_atom_features=100,
			mol_atom_features=self.Graph_mol_layer_2.layer_conv, 
			mask=mol_mask,
			max_nodes_in_mol=max_nodes_in_mol, 
			fp_length=mol_fp_length
			)

		##########################################################

		self.Drop_FC_layer=DropoutHiddenLayer(
			rng=numpy_rng,
			input=T.concatenate([self.Drop_POC_FP_layer.layer_fp, self.Drop_MOL_FP_layer.layer_fp],axis=1),
			n_in=poc_fp_length+mol_fp_length,
			n_out=100,
			activation=relu,
			W=W_hid,
			b=b_hid,
			dropout_rate=dropout_rate,
			)

		self.tparams['W_hid'] = self.Drop_FC_layer.W
		self.tparams['b_hid'] = self.Drop_FC_layer.b

		self.FC_layer=HiddenLayer(
			rng=numpy_rng,
			input=T.concatenate([self.POC_FP_layer.layer_fp, self.MOL_FP_layer.layer_fp],axis=1),
			n_in=poc_fp_length+mol_fp_length,
			n_out=100,
			activation=relu,
			W=self.Drop_FC_layer.W* (1 - dropout_rate),
			b=self.Drop_FC_layer.b* (1 - dropout_rate)
			)

		self.Drop_logLayer = LogisticRegression(
			input=self.Drop_FC_layer.output,
			n_in=100,
			n_out=2,
			W=W_log,
			b=b_log,
		)

		self.tparams['W_log'] = self.Drop_logLayer.W
		self.tparams['b_log'] = self.Drop_logLayer.b

		self.logLayer = LogisticRegression(
			input=self.FC_layer.output,
			n_in=100,
			n_out=2,
			W=self.Drop_logLayer.W,
			b=self.Drop_logLayer.b,
		)

		self.finetune_cost = self.Drop_logLayer.negative_log_likelihood(self.y)
		self.dropout_errors = self.Drop_logLayer.errors
		self.errors = self.logLayer.errors(self.y)
		self.y_pred = self.logLayer.y_pred
		self.y_prob = self.logLayer.p_y_given_x


def test_fine_S_CNN_dA(fold,finetune_lr=0.003, training_epochs=15, batch_size=5, reg=5e-9):

	all_poc, max_ff, min_ff, mean_ff = get_all_pocs() 

	[W_poc_self_1,  W_poc_self_2,  W_poc_out, W_poc_degrees_1, W_poc_degrees_2, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_mol_degrees_1, W_mol_degrees_2, W_hid, W_log] = [None,None,None,None,None, None,None,None,None,None,None,None]
	[b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log]=[None,None,None,None,None,None,None,None]
	Weights = [W_poc_self_1,  W_poc_self_2,  W_poc_out, W_poc_degrees_1, W_poc_degrees_2, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_mol_degrees_1, W_mol_degrees_2, W_hid, W_log]
	Bias = [b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log]


	traindata, testdata = load_MUV_data_shuffle_fold(fold)

	train_mols, train_pockets, labels_train = traindata # one pair of (pocket, mol), one label
	test_mols ,test_pockets,  labels_test = testdata

	num_atom_features, num_bond_features = get_atom_bond_dim([train_mols[0]])
	
	n_train_batches = len(labels_train)
	n_test_batches = len(labels_test)
	n_train_batches = int(n_train_batches/batch_size) # batch_size should be the number of paris of (pocket, mol)
	n_test_batches = int(n_test_batches/batch_size)


	numpy_rng = numpy.random.RandomState(89677)
	print ('... building the model')
	batchsize = batch_size

	env_features = T.matrix('env_features', dtype=theano.config.floatX)
	env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX)
	env_mask = T.fvector('env_mask')
	env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX)

	mol_atom_features = T.matrix('mol_atom_features', dtype=theano.config.floatX)
	mol_bond_features = T.matrix('mol_bond_features', dtype=theano.config.floatX)
	mol_atom_neighbors = T.matrix('mol_atom_neighbors', dtype=theano.config.floatX)
	mol_bond_neighbors = T.matrix('mol_bond_neighbors', dtype=theano.config.floatX)
	mol_mask = T.fvector('mol_mask')
	mol_degrees = T.matrix('mol_degrees', dtype=theano.config.floatX)
	labels = T.ivector('labels')

	s_cnn_da = Conv3D_Graph_Conv(
		numpy_rng=numpy_rng,
		Weights=Weights,
		Bias=Bias,
		env_features=env_features, 
		env_neighbors=env_neighbors,
		env_mask=env_mask,
		env_degrees=env_degrees, 
		mol_atom_features=mol_atom_features, 
		mol_bond_features=mol_bond_features, 
		mol_atom_neighbors=mol_atom_neighbors,
		mol_bond_neighbors=mol_bond_neighbors,
		mol_mask=mol_mask,
		mol_degrees=mol_degrees,    
		labels=labels,
		num_input=batch_size,
		num_poc_in_features=480, 
		poc_fp_length=512,
		num_mol_atom_features=62, 
		num_mol_bond_features=6, 
		mol_fp_length=216,
		dropout_rate=0.1
		)


	# get the training, validation and testing function for the model
	print ('... getting the finetuning functions')
   

	# compute number of minibatches for training, validation and testing
	index = T.lscalar('index')  # index to a [mini]batch
	lr = T.scalar(name='lr')


	tparams = s_cnn_da.tparams
	params = []
	for p in tparams.keys():
		params.append(tparams[p])

	cost = s_cnn_da.finetune_cost
	grads = T.grad(cost, params)

	tparams_keys= tparams.keys()

	zipped_grads = [theano.shared(tparams[k].get_value() * numpy_floatX(0.), name='%s_grad' % k) for k in tparams_keys]
	running_grads = [theano.shared(tparams[k].get_value() * numpy_floatX(0.), name='%s_rgrad' % k) for k in tparams_keys]
	running_grads2 = [theano.shared(tparams[k].get_value() * numpy_floatX(0.), name='%s_rgrad2' % k)for k in tparams_keys]


	zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
	rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
	rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
			 for rg2, g in zip(running_grads2, grads)]

	train_fn = theano.function([env_features, env_neighbors, env_degrees, env_mask, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mol_mask, labels], cost,
									updates=zgup + rgup + rg2up,
									name='rmsprop_f_grad_shared')

	
	# updir = [theano.shared(p.get_value() * numpy_floatX(0.),
	#                        name='%s_updir' % k)
	#          for k, p in tparams.iteritems()]
	updir = [theano.shared(tparams[k].get_value() * numpy_floatX(0.),name='%s_updir' % k) for k in tparams_keys]


	updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg ** 2 + 1e-4))
				 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
											running_grads2)]
	param_up = [(p, p + udn[1])
				for p, udn in zip(tparams.values(), updir_new)]
	f_update = theano.function([lr], [], updates=updir_new + param_up,
							   on_unused_input='ignore',
							   name='rmsprop_f_update')


	index = T.lscalar('index')  # index to a [mini]batch

	train_score_i = theano.function(
		[env_features, env_neighbors, env_degrees, env_mask, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mol_mask, labels],
		s_cnn_da.errors,
		name='train'
	)

	#shared_env_features, shared_env_neighbors, shared_env_degrees, shared_mask, T.cast(shared_labels, 'int32')

	test_score_i = theano.function(
		[env_features, env_neighbors, env_degrees, env_mask, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mol_mask, labels],
		s_cnn_da.errors,
		name='test'
	)

	
	print ('... finetunning the model')
	# early-stopping parameters
	patience = 100000  # look as this many examples regardless
	patience_increase = 2.  # wait this much longer when a new best is
							# found
	improvement_threshold = 0.995  # a relative improvement of this much is
								   # considered significant
	
	best_test_score = numpy.inf
	test_score = 0.
	start_time = time.clock()

	done_looping = False
	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		#list_file= open('../progress_feature_CNN_fine_lstm_cnn_'+site+'_'+str(begin)+'_011717.txt','a')
		
		epoch = epoch + 1
		train_losses=[]
		# for part_index in range (20):
		for minibatch_index in range(0,n_train_batches):
			# [env_features, env_neighbors, env_degrees, mask, labels],

			###
			env_features_train, env_neighbors_train, env_degrees_train, env_mask_train = get_pocket_attributes(train_pockets[minibatch_index * batch_size: (minibatch_index + 1) * batch_size],max_ff, min_ff, mean_ff)
			mol_atom_features_train, mol_bond_features_train, mol_atom_neighbors_train, mol_bond_neighbors_train, mol_degrees_train, mol_mask_train = get_mol_attributes(train_mols[minibatch_index * batch_size: (minibatch_index + 1) * batch_size])
			y_train=labels_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
			train_EF, train_EN, train_ED, train_env_mask, train_labels = shared_dataset(env_features_train, env_neighbors_train, env_degrees_train, env_mask_train, y_train)
			train_MF_atom, train_MF_bond, train_MN_atom, train_MN_bond, train_MD, train_mol_mask = shared_dataset_mol(mol_atom_features_train, mol_bond_features_train, mol_atom_neighbors_train, mol_bond_neighbors_train, mol_degrees_train, mol_mask_train)

			minibatch_avg_cost = train_fn(train_EF.eval(), train_EN.eval(), train_ED.eval(), train_env_mask.eval(), train_MF_atom.eval(), train_MF_bond.eval(), train_MN_atom.eval(), train_MN_bond.eval(), train_MD.eval(), train_mol_mask.eval(), train_labels.eval())
			# list_file.write('epoch: '+str(epoch)+',cost: '+str(minibatch_avg_cost)+'\n')
			train_err=train_score_i(train_EF.eval(), train_EN.eval(), train_ED.eval(), train_env_mask.eval(), train_MF_atom.eval(), train_MF_bond.eval(), train_MN_atom.eval(), train_MN_bond.eval(), train_MD.eval(), train_mol_mask.eval(), train_labels.eval())
			train_losses.append(train_err)
			
			#minibatch_avg_cost = train_fn(minibatch_index)
			f_update(finetune_lr)
			iter = (epoch - 1) * n_train_batches + minibatch_index
			if minibatch_index % 1000==0 and minibatch_index!=0:
				train_losses_=numpy.array(train_losses)
				this_train_loss=numpy.mean(train_losses_)
				list_file= open('../progress_graph_MUV_CV_tars_fold_'+str(fold)+'.txt','a')
				list_file.write('epoch %i, minibatch %i, n_train_batches %i, train error %f %%' %(epoch, minibatch_index, n_train_batches, this_train_loss * 100.))
				list_file.write('\n')
				list_file.close()

				# test_losses=[]

				# #####
				# for i in range(n_test_batches):
				# 	env_features_test, env_neighbors_test, env_degrees_test, env_mask_test = get_pocket_attributes(test_pockets[i * batch_size: (i + 1) * batch_size])
				# 	mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test = get_mol_attributes(test_mols[i * batch_size: (i + 1) * batch_size])
				# 	y_test=labels_test[i * batch_size: (i + 1) * batch_size]
				# 	test_EF, test_EN, test_ED, test_env_mask, test_labels = shared_dataset(env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, y_test)
				# 	test_MF_atom, test_MF_bond, test_MN_atom, test_MN_bond, test_MD, test_mol_mask = shared_dataset_mol(mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test)

				# 	test_err=train_score_i(test_EF.eval(), test_EN.eval(), test_ED.eval(), test_env_mask.eval(), test_MF_atom.eval(), test_MF_bond.eval(), test_MN_atom.eval(), test_MN_bond.eval(), test_MD.eval(), test_mol_mask.eval(), test_labels.eval())
				# 	test_losses.append(test_err)

				# test_score = numpy.mean(test_losses)
				# #####
				# list_file= open('../progress_graph_MUV_fold_'+str(fold)+'.txt','a')
				# print(('     epoch %i, test error of best model %f %%') % (epoch, test_score * 100.))
				# list_file.write(('     epoch %i, test error of best model %f %%') % (epoch, test_score * 100.))
				# list_file.write('\n')
				# list_file.close()

				#### dump weights ######
				W_poc_out = s_cnn_da.Drop_POC_FP_layer.W_out
				b_poc_out = s_cnn_da.Drop_POC_FP_layer.b_out
				W_mol_out = s_cnn_da.Drop_MOL_FP_layer.W_out
				b_mol_out = s_cnn_da.Drop_MOL_FP_layer.b_out


				W_poc_self_1 = s_cnn_da.Drop_Graph_poc_layer_1.W_self
				b_poc_layer_1 = s_cnn_da.Drop_Graph_poc_layer_1.b_layer
				W_mol_self_1 = s_cnn_da.Drop_Graph_mol_layer_1.W_self
				b_mol_layer_1 = s_cnn_da.Drop_Graph_mol_layer_1.b_layer

				W_poc_self_2 = s_cnn_da.Drop_Graph_poc_layer_2.W_self
				b_poc_layer_2 = s_cnn_da.Drop_Graph_poc_layer_2.b_layer
				W_mol_self_2 = s_cnn_da.Drop_Graph_mol_layer_2.W_self
				b_mol_layer_2 = s_cnn_da.Drop_Graph_mol_layer_2.b_layer

				
				W_hid = s_cnn_da.Drop_FC_layer.W
				b_hid = s_cnn_da.Drop_FC_layer.b
				W_log = s_cnn_da.Drop_logLayer.W
				b_log = s_cnn_da.Drop_logLayer.b


				Weights = [W_poc_self_1,  W_poc_self_2,  W_poc_out, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_hid, W_log]
				Bias = [b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log]


				W_poc_degrees_1 = []
				for d in range(0,max_poc_degrees):
					W_poc_degrees_1.append(s_cnn_da.Graph_poc_layer_1.W_degrees[d])

				W_poc_degrees_2 = []
				for d in range(0,max_poc_degrees):
					W_poc_degrees_2.append(s_cnn_da.Graph_poc_layer_2.W_degrees[d])

				W_mol_degrees_1 = []
				for d in range(0,max_mol_degrees):
					W_mol_degrees_1.append(s_cnn_da.Graph_mol_layer_1.W_degrees[d])

				W_mol_degrees_2 = []
				for d in range(0,max_mol_degrees):
					W_mol_degrees_2.append(s_cnn_da.Graph_mol_layer_2.W_degrees[d])


				Weights.extend([W_poc_degrees_1, W_poc_degrees_2, W_mol_degrees_1, W_mol_degrees_2])
				dump_weights_pickle(Weights, Bias, file_name='../weights_Graph/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip')

			  
		train_losses=numpy.array(train_losses)
		this_train_loss=numpy.mean(train_losses)
		list_file= open('../results_graph_MUV_CV_tars_fold_'+str(fold)+'_041519.txt','a')
		list_file.write('epoch %i, train error %f %%' %(epoch, this_train_loss * 100.))
		list_file.write('\n')


		test_losses=[]

		#####
		for i in range(n_test_batches):
			env_features_test, env_neighbors_test, env_degrees_test, env_mask_test = get_pocket_attributes(test_pockets[i * batch_size: (i + 1) * batch_size],max_ff, min_ff, mean_ff)
			mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test = get_mol_attributes(test_mols[i * batch_size: (i + 1) * batch_size])
			y_test=labels_test[i * batch_size: (i + 1) * batch_size]
			test_EF, test_EN, test_ED, test_env_mask, test_labels = shared_dataset(env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, y_test)
			test_MF_atom, test_MF_bond, test_MN_atom, test_MN_bond, test_MD, test_mol_mask = shared_dataset_mol(mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test)

			test_err=train_score_i(test_EF.eval(), test_EN.eval(), test_ED.eval(), test_env_mask.eval(), test_MF_atom.eval(), test_MF_bond.eval(), test_MN_atom.eval(), test_MN_bond.eval(), test_MD.eval(), test_mol_mask.eval(), test_labels.eval())
			test_losses.append(test_err)

		test_score = numpy.mean(test_losses)
		#####

		print(('     epoch %i, test error of best model %f %%') % (epoch, test_score * 100.))
		list_file.write(('     epoch %i, test error of best model %f %%') % (epoch, test_score * 100.))
		list_file.write('\n')
		list_file.close()

def eval_ROC(pro_name,Weights_ID,epoch,minibatch_index,fold):
	from sklearn import metrics
	from sklearn.metrics import roc_auc_score

	all_y_true = numpy.load('../MUV_results_0408/test_labels_'+Weights_ID+'_fold_'+str(fold)+'_'+pro_name+'.dat')
	all_y_prob = numpy.load('../MUV_results_0408/test_probs_'+Weights_ID+'_fold_'+str(fold)+'_'+pro_name+'.dat')
	
	fpr, tpr, thresholds = metrics.roc_curve(all_y_true, all_y_prob)
	S_1 = metrics.auc(fpr, tpr)
	S_2 = roc_auc_score(all_y_true, all_y_prob)

	print ("target:"+pro_name)
	print ("AUC score:"+str(S_1))
	list_file = open('../ROC_results_'+Weights_ID+'.txt','a')
	list_file.write("target:"+pro_name+' '+'\n')
	list_file.write("AUC score:"+str(S_1)+'\n')


def eval_MUV_ROC_cross_val(batch_size=5):

	epoch=1
	all_poc, max_ff, min_ff, mean_ff = get_all_pocs() 


	for fold in range(0,4):
		print fold
		if fold == 0:
			minibatch_index=53000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		elif fold ==1:
			minibatch_index=53000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		elif fold ==2:
			minibatch_index=53000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		elif fold ==3:
			minibatch_index=53000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		elif fold ==4:
			minibatch_index=53000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		elif fold ==5:
			minibatch_index=59000
			Weights_file_name = '../weights/weight_FEATURE_graph_MUV_CV_tars_fold_'+str(fold)+'_e'+str(epoch)+'_'+str(minibatch_index)+'_041519.zip'
		
		Weights_ID = 'MUV_CV_tars'

		[W_poc_self_1,  W_poc_self_2,  W_poc_out, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_hid, W_log, b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log, W_poc_degrees_1, W_mol_degrees_1, W_poc_degrees_2, W_mol_degrees_2] = load_weights_pickle(Weights_file_name)

		Weights = [W_poc_self_1,  W_poc_self_2,  W_poc_out, W_poc_degrees_1, W_poc_degrees_2, W_mol_self_1,  W_mol_self_2,  W_mol_out, W_mol_degrees_1, W_mol_degrees_2, W_hid, W_log]
		Bias = [b_poc_layer_1, b_poc_layer_2, b_poc_out, b_mol_layer_1, b_mol_layer_2, b_mol_out, b_hid, b_log]

		
		all_targets_train,all_targets_test=load_MUV_dataset_CV(fold)
		for target in all_targets_test:
			print target
			test_pockets=[]
			test_mols=[]
			labels_test=[]

			test_lig_pos,test_lig_neg,FF_name=all_targets_test[target]
			pocket_name=FF_name

			label=1
			for sm in test_lig_pos:
				test_pockets.append(pocket_name)
				test_mols.append(sm)
				labels_test.append(label)

			label=0
			for sm in test_lig_neg:
				test_pockets.append(pocket_name)
				test_mols.append(sm)
				labels_test.append(label)

			test_mols,test_pockets,labels_test=test_60(test_mols,test_pockets,labels_test)


			num_atom_features, num_bond_features = get_atom_bond_dim([test_mols[0]])
			numpy_rng = numpy.random.RandomState(89677)

			n_test_batches = len(labels_test)
			n_test_batches = int(n_test_batches/batch_size)
			batchsize = batch_size

			env_features = T.matrix('env_features', dtype=theano.config.floatX)
			env_neighbors = T.matrix('env_neighbors', dtype=theano.config.floatX)
			env_mask = T.fvector('env_mask')
			env_degrees = T.matrix('env_degrees', dtype=theano.config.floatX)

			mol_atom_features = T.matrix('mol_atom_features', dtype=theano.config.floatX)
			mol_bond_features = T.matrix('mol_bond_features', dtype=theano.config.floatX)
			mol_atom_neighbors = T.matrix('mol_atom_neighbors', dtype=theano.config.floatX)
			mol_bond_neighbors = T.matrix('mol_bond_neighbors', dtype=theano.config.floatX)
			mol_mask = T.fvector('mol_mask')
			mol_degrees = T.matrix('mol_degrees', dtype=theano.config.floatX)
			labels = T.ivector('labels')

			
			s_cnn_da = Conv3D_Graph_Conv(
				numpy_rng=numpy_rng,
				Weights=Weights,
				Bias=Bias,
				env_features=env_features, 
				env_neighbors=env_neighbors,
				env_mask=env_mask,
				env_degrees=env_degrees, 
				mol_atom_features=mol_atom_features, 
				mol_bond_features=mol_bond_features, 
				mol_atom_neighbors=mol_atom_neighbors,
				mol_bond_neighbors=mol_bond_neighbors,
				mol_mask=mol_mask,
				mol_degrees=mol_degrees,    
				labels=labels,
				num_input=batch_size,
				num_poc_in_features=480, 
				poc_fp_length=512,
				num_mol_atom_features=62, 
				num_mol_bond_features=6, 
				mol_fp_length=216,
				dropout_rate=0.1
				)

			test_score_i = theano.function(
				[env_features, env_neighbors, env_degrees, env_mask, mol_atom_features, mol_bond_features, mol_atom_neighbors, mol_bond_neighbors, mol_degrees, mol_mask, labels],
				[s_cnn_da.errors,s_cnn_da.y_prob],
				name='test'
			)

			test_losses=[]
			test_y_prob=[]
			test_y_true=[]

			for i in range(n_test_batches):
				env_features_test, env_neighbors_test, env_degrees_test, env_mask_test = get_pocket_attributes(test_pockets[i * batch_size: (i + 1) * batch_size],max_ff, min_ff, mean_ff)
				mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test = get_mol_attributes(test_mols[i * batch_size: (i + 1) * batch_size])
				y_test=labels_test[i * batch_size: (i + 1) * batch_size]
				test_EF, test_EN, test_ED, test_env_mask, test_labels = shared_dataset(env_features_test, env_neighbors_test, env_degrees_test, env_mask_test, y_test)
				test_MF_atom, test_MF_bond, test_MN_atom, test_MN_bond, test_MD, test_mol_mask = shared_dataset_mol(mol_atom_features_test, mol_bond_features_test, mol_atom_neighbors_test, mol_bond_neighbors_test, mol_degrees_test, mol_mask_test)

				[test_err,test_prob]=test_score_i(test_EF.eval(), test_EN.eval(), test_ED.eval(), test_env_mask.eval(), test_MF_atom.eval(), test_MF_bond.eval(), test_MN_atom.eval(), test_MN_bond.eval(), test_MD.eval(), test_mol_mask.eval(), test_labels.eval())
				test_losses.append(test_err)
				test_y_prob.extend(test_prob[:,1])
				test_y_true.extend(y_test)

			test_score = numpy.mean(test_losses)
			test_y_prob = numpy.array(test_y_prob)
			test_y_true = numpy.array(test_y_true)
			#####
			test_y_true.dump('../results/MUV_CV/test_labels_'+Weights_ID+'_fold_'+str(fold)+'_'+target+'.dat')
			test_y_prob.dump('../results/MUV_CV/test_probs_'+Weights_ID+'_fold_'+str(fold)+'_'+target+'.dat')
			

			eval_ROC(target,Weights_ID,epoch,minibatch_index,fold)


if __name__ == '__main__':

	fold = int(sys.argv[1])
	mode = 'ROC'

	if mode == 'train':
		test_fine_S_CNN_dA(fold=fold,finetune_lr=0.005,training_epochs=15)
	elif mode == 'ROC':
		eval_MUV_ROC_cross_val()