import os import sys import time import numpy import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from theano.tensor.signal import downsample from theano.tensor.nnet import conv from theano.tensor.nnet import conv3d2d from scipy.io import matlab import re import math from theano import shared from collections import OrderedDict from layers import * from theano.misc.pkl_utils import dump import numpy as np from pocket_ff_to_box_backbone import * max_poc_degrees = 20 max_nodes_in_poc = 50 def relu(X): return T.maximum(0,X) class PocketGraph(object): def __init__(self): self.nodes = {} def new_node(self, ntype, pos, features=None, label=None, env_ix=None): new_node = Env(ntype, pos, features, label, env_ix) self.nodes.setdefault(ntype, []).append(new_node) return new_node def add_subgraph(self, subgraph): old_nodes = self.nodes new_nodes = subgraph.nodes for ntype in set(old_nodes.keys()) | set(new_nodes.keys()): old_nodes.setdefault(ntype, []).extend(new_nodes.get(ntype, [])) def sort_nodes_by_degree(self, ntype): nodes_by_degree = {i : [] for i in xrange(max_poc_degrees)} for node in self.nodes[ntype]: nodes_by_degree[len(node.get_neighbors(ntype))].append(node) new_nodes = [] for degree in xrange(max_poc_degrees): cur_nodes = nodes_by_degree[degree] self.nodes[(ntype, degree)] = cur_nodes new_nodes.extend(cur_nodes) self.nodes[ntype] = new_nodes def get_degree(self, ntype): all_node_degree=[] for node in self.nodes[ntype]: all_node_degree.append(len(node.get_neighbors(ntype))) return numpy.array(all_node_degree) def feature_array(self, ntype): assert ntype in self.nodes return np.array([node.features for node in self.nodes[ntype]]) def pos_array(self, ntype): assert ntype in self.nodes return np.array([node.pos for node in self.nodes[ntype]]) def label_array(self, ntype): assert ntype in self.nodes return np.array([node.label for node in self.nodes[ntype]]) def neighbor_list(self, self_ntype, neighbor_ntype): assert self_ntype in self.nodes and neighbor_ntype in self.nodes neighbor_idxs = {n : i for i, n in enumerate(self.nodes[neighbor_ntype])} for self_node in self.nodes[self_ntype]: neighbors=self_node.get_neighbors(neighbor_ntype) return [[neighbor_idxs[neighbor] for neighbor in self_node.get_neighbors(neighbor_ntype)] for self_node in self.nodes[self_ntype]] def env_ix_array(self): return np.array([node.env_ix for node in self.nodes['env']]) class Env(object): __slots__ = ['ntype', 'features', '_neighbors', 'pos', 'label','env_ix'] def __init__(self, ntype, pos, features, label, env_ix): self.ntype = ntype self.features = features self._neighbors = [] self.pos = pos self.label = label self.env_ix = env_ix def add_neighbors(self, neighbor_list): for neighbor in neighbor_list: self._neighbors.append(neighbor) neighbor._neighbors.append(self) def get_neighbors(self, ntype): return [n for n in self._neighbors if n.ntype == ntype] def dist(env_1,env_2): return np.sqrt(np.sum((env_1.pos-env_2.pos)**2)) def pad_neighbors(neighbors): pad_neighbors = numpy.zeros((len(neighbors),len(neighbors))) for i in xrange (len(neighbors)): entry = neighbors[i] for e in xrange (len(entry)): pad_neighbors[i][entry[e]]=1 return pad_neighbors def pad_degree(degrees,max_degrees): pad_degrees = numpy.zeros((len(degrees),max_degrees)) for i in xrange (len(degrees)): degree = degrees[i] if degree < max_degrees: pad_degrees[i][degree]=1 else: return False, pad_degrees return True, pad_degrees def get_pocket_attributes(pts_list, input_dir, pdb_dir): pockets, pocket_valid = pocket_pts_to_backbone_box(pts_list, input_dir, pdb_dir) if pocket_valid: big_graph, mask = graph_from_pocket_tuple(pockets) env_features = big_graph.feature_array('env') env_labels = big_graph.label_array('env') env_neighbors = big_graph.neighbor_list('env','env') env_neighbors = pad_neighbors(env_neighbors) env_degrees = big_graph.get_degree('env') degree_valid, env_degrees = pad_degree(env_degrees, max_poc_degrees) if degree_valid: return True, env_features, env_neighbors, env_degrees, mask, env_labels else: return False, None, None, None, None, None else: return False, None, None, None, None, None def graph_from_pocket_tuple(pocket_Env_list): graph_list = [graph_from_Env(s) for s in pocket_Env_list] big_graph = PocketGraph() for i in xrange (len(graph_list)): subgraph = graph_list[i] graph, mask = subgraph big_graph.add_subgraph(graph) if i ==0: big_graph_mask = mask else: big_graph_mask = numpy.concatenate((big_graph_mask, mask), axis=0) return big_graph, big_graph_mask def graph_from_Env(entry): file_name=entry[0] pdb=file_name[0:4] all_ffs=entry[1] num_of_Env=entry[2] mask = numpy.zeros((max_nodes_in_poc,)) for i in xrange(num_of_Env): mask[i]=1 graph = Connectivity_from_ff(all_ffs,num_of_Env) pocket_node = graph.new_node('pocket', pos=[0,0,0]) if 'env' in graph.nodes.keys(): pocket_node.add_neighbors(graph.nodes['env']) return graph, mask def Connectivity_from_ff(all_ffs,num_of_Env): graph = parse_ff_to_Env(all_ffs,num_of_Env) if 'env' in graph.nodes.keys(): for i in range (0,num_of_Env): for j in range (i+1,num_of_Env): env_1=graph.nodes['env'][i] env_2=graph.nodes['env'][j] if dist(env_1,env_2)<7: env_1.add_neighbors([env_2]) return graph def parse_ff_to_Env(all_ffs,num_of_Env): graph = PocketGraph() for i in range (0,num_of_Env): env=all_ffs[i][0] pos=all_ffs[i][1] label=all_ffs[i][2] new_atom_node = graph.new_node('env', features=env, pos=pos, label=label, env_ix=i) if num_of_Env