import os import csv import numpy as np import itertools as it import numpy import numpy.random from random import shuffle import sys import math import collections import scipy import scipy.spatial import json max_poc_degrees = 20 max_nodes_in_poc = 50 input_dir = '../data/DUDE_drugFEATURE_scPDB_ff/' input_ext = '.ff' def get_drug_drug_dict(): score_file = open('../data/drug_drug_score.out') score_list = list(score_file) drug_drug_dict={} for line in score_list: eles = line.split('\t') target_FF = eles[0] com_FF = eles[1] if target_FF=='tmp' or com_FF=='tmp': continue if target_FF=='1DVX_DIF' or com_FF=='1DVX_DIF': continue if eles[-1].strip()!='NA': FF_score = float(eles[-1]) if target_FF not in drug_drug_dict.keys(): drug_drug_dict[target_FF]=[] drug_drug_dict[target_FF].append((com_FF,FF_score)) return drug_drug_dict def get_DUDE_DUDE_dict(): score_file = open('../data/DUDE_pocket_similarity_score.txt') score_list = list(score_file) pocketFEATURE_dict={} for line in score_list: eles = line.split('\t') target_FF = eles[0] com_FF = eles[1] FF_score = float(eles[-1]) if target_FF=='tmp' or com_FF=='tmp': continue if target_FF=='1DVX_DIF' or com_FF=='1DVX_DIF': continue if target_FF not in pocketFEATURE_dict.keys(): pocketFEATURE_dict[target_FF]=[] pocketFEATURE_dict[target_FF].append((com_FF,FF_score)) # pocketFEATURE_dict[(target_FF,com_FF)]= FF_score return pocketFEATURE_dict def get_DUDE_drug_dict(): score_file = open('../data/DUDE_drugFEATURE_pocket_similarity_score.txt') score_list = list(score_file) DUDE_drug_dict={} drug_DUDE_dict={} for line in score_list: eles = line.split('\t') target_FF = eles[0] drug_FF = eles[1] if eles[-1].strip()!='NA': FF_score = float(eles[-1]) if target_FF=='tmp' or drug_FF=='tmp': continue if target_FF=='1DVX_DIF' or drug_FF=='1DVX_DIF': continue if target_FF not in DUDE_drug_dict.keys(): DUDE_drug_dict[target_FF]=[] if drug_FF not in drug_DUDE_dict.keys(): drug_DUDE_dict[drug_FF]=[] DUDE_drug_dict[target_FF].append((drug_FF,FF_score)) drug_DUDE_dict[drug_FF].append((target_FF,FF_score)) return DUDE_drug_dict,drug_DUDE_dict def get_all_env_ff(ff_list): all_pocket=[] dat_num=0 FV=[] for fn in ff_list: site_ID=fn.strip('.ff') ele = fn.split('_') PDB = fn[0:4] lig = ele[1].split('.')[0] correct_fn = PDB.lower()+'_'+lig+'.ff' f = open(os.path.join(input_dir,PDB.lower()+fn[4:])) infile=list(f) for line in infile: S=line.split() if S!=[]: if len(S[0])>3: if S[0][0:3]=="Env": feature_vec=numpy.zeros((480,)) for i in range (0,480): #S[1]-S[480] feature_vec[i]=float(S[i+1]) T=numpy.array(feature_vec) FV.append(T) f.close() return numpy.array(FV) def get_all_pocs(): drug_drug_dict = get_drug_drug_dict() DUDE_DUDE_dict = get_DUDE_DUDE_dict() DUDE_drug_dict,drug_DUDE_dict = get_DUDE_drug_dict() all_poc_name = set(list(drug_drug_dict.keys()) + list(DUDE_DUDE_dict.keys()) + list(DUDE_drug_dict.keys()) + list(drug_DUDE_dict.keys())) all_poc_ff = [f+'.ff' for f in all_poc_name] all_poc_ff.extend(['3pbl_ETQ.ff','3g0e_B49.ff','4trj_665.ff','3e37_ED5.ff','3nf7_CIW.ff']) #4trj_NAD.ff') all_poc_ff.sort() order_file=open('../results/sda_poc_order.txt','w') for f in all_poc_ff: order_file.write(f+'\n') all_FF = get_all_env_ff(all_poc_ff) max_ff = numpy.max(all_FF,axis=0) min_ff = numpy.min(all_FF,axis=0) mean_ff = numpy.mean(all_FF,axis=0) return all_poc_ff, max_ff, min_ff, mean_ff