######################################### ############### PCA ################### ######################################### import matplotlib as mpl mpl.use('Agg') import os import numpy as np from sklearn.decomposition import PCA import pickle, sys from sklearn.manifold import TSNE # from plot_score_correct import * from extract_pts import * import matplotlib.pyplot as plt from rdkit.Chem import MolFromSmiles from rdkit import Chem, DataStructs from rdkit.Chem import AllChem from io_utils_DUDE_neg_poc import get_all_mols from io_utils_MUV_ROC import * import numpy import os import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # from mpl_toolkits.mplot3d import Axes3D def save(path, ext='png', close=True, verbose=True): directory = os.path.split(path)[0] filename = "%s.%s" % (os.path.split(path)[1], ext) if directory == '': directory = '.' if not os.path.exists(directory): os.makedirs(directory) savepath = os.path.join(directory, filename) if verbose: print("Saving figure to '%s'..." % savepath), plt.savefig(savepath, dpi=900) if close: plt.close() if verbose: print("Done") def get_FF(target,PDB_ID,DUDE_ctr): print (target, PDB_ID, DUDE_ctr) pro_name=target ff_name = None ligs=cut_ligand_all_atoms(PDB_ID,DUDE_ctr,False) if ligs == []: print('cant find ligs'+'\n') for l in ligs: [lig_ID,lig_chain,lig_no, ctr]=l #ff_name=PDB_ID+'_'+str(lig_ID)+'_'+str(lig_chain)+'_'+str(lig_no)+'.ff' ff_name=PDB_ID+'_'+str(lig_ID)+'.ff' return ff_name def get_target_ff_dict(): filename = "../data/DUDE/DUDE_PDBID.csv" data = read_csv_DUDE(filename,"Target Name","PDB") target_FF_dict={} for i in range(0,len(data[0])): # For each target target = data[0][i] PDB_ID = data[1][i] crystal_lig = open('../data/DUDE/'+target.lower()+'/'+'crystal_ligand.mol2') lig_ctr=parse_crystal_lig(list(crystal_lig)) target_FF = get_FF(target,PDB_ID,lig_ctr) # Target FF if target_FF!=None: target_FF_dict[target_FF]=target return target_FF_dict def get_silhouette(): import json import sklearn from sklearn import metrics silhouette_dict={} with open('../target_FF_dict','r') as infile: target_FF_dict = json.load(infile) MUV_target = '689' target_lig=load_MUV_dataset() all_targets={} for t in target_lig: all_actives = [] all_decoys = [] per_targets=[] FOUND=False pro_name=t[0] #if pro_name=='689': # if pro_name==MUV_target: # active_file = open('../poc2vec_sda/MUV_mol/mol_order_morgan_'+pro_name+'_'+DUDE_tar+'.txt','w') PDB_ID=t[1] ff_name=t[2] pos_ligs=t[3] neg_ligs=t[4] num_neg = len(neg_ligs) num_pos = len(pos_ligs) for m in pos_ligs: all_actives.append(m) for m in neg_ligs:#[0:200]: all_decoys.append(m) act_mols = [ MolFromSmiles(m) for m in all_actives] act_fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in act_mols] dec_mols = [ MolFromSmiles(m) for m in all_decoys] dec_fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in dec_mols] # convert the RDKit explicit vectors into numpy arrays np_act_fps = [] for fp in act_fps: arr = numpy.zeros((1,)) DataStructs.ConvertToNumpyArray(fp, arr) np_act_fps.append(arr) np_act_fps = numpy.array(np_act_fps) # print np_act_fps # print np_act_fps.shape act_lables = np.ones(np_act_fps.shape[0]) np_dec_fps = [] for fp in dec_fps: arr = numpy.zeros((1,)) DataStructs.ConvertToNumpyArray(fp, arr) np_dec_fps.append(arr) np_dec_fps = numpy.array(np_dec_fps) dec_lables = np.zeros(np_dec_fps.shape[0]) X = numpy.concatenate((np_act_fps,np_dec_fps),axis=0) labels = numpy.concatenate((act_lables,dec_lables)) W = metrics.silhouette_samples(X, labels, metric='euclidean') # print pro_name # print W.shape # print W[0:30] # print numpy.mean(W[0:30]) # S = metrics.silhouette_score(X, labels, metric='euclidean') # print pro_name # print S silhouette_dict[pro_name]=numpy.mean(W[0:30]) return silhouette_dict def save(path, ext='png', close=True, verbose=True): # Extract the directory and filename from the given path directory = os.path.split(path)[0] filename = "%s.%s" % (os.path.split(path)[1], ext) if directory == '': directory = '.' # If the directory does not exist, create it if not os.path.exists(directory): os.makedirs(directory) # The final path to save to savepath = os.path.join(directory, filename) if verbose: print("Saving figure to '%s'..." % savepath), # Actually save the figure # plt.savefig(savepath) plt.savefig(savepath+'.eps', format='eps',dpi=900)#bbox_extra_artists=(lgd,), bbox_inches='tight' if close: plt.close() if verbose: print("Done") def get_PF_Scores(): PF_dict={} FF_tar_dict = {'2y6o_1N1':'689','1au8_0H8':'832','1yow_P0E':'692','3v2y_ML5':'466','3poo_S69':'548','5exm_5ST':'846','4xe4_NAG':'852','5cxv_0HK':'859','1yow_P0E':'600','5tn7_7G2':'713'} DUDE_MUV_file=open('DUDE_MUV.out') DUDE_MUV_list = list(DUDE_MUV_file) MUV_DUDE_dict={} for line in DUDE_MUV_list: ele=line.split() MUV=ele[0] DUDE=ele[1] score = -1*float(ele[-1]) if MUV not in MUV_DUDE_dict.keys(): MUV_DUDE_dict[MUV]=[] MUV_DUDE_dict[MUV].append(score) print ("MUV_DUDE_dict") print (MUV_DUDE_dict.keys()) for MUV in MUV_DUDE_dict.keys(): S = MUV_DUDE_dict[MUV] #s = numpy.mean(S) s = numpy.max(S) PF_dict[FF_tar_dict[MUV]]=s #PF_dict['600']=PF_dict['692'] return PF_dict def plot_data(): #silhouette_dict=get_silhouette() PF_dict=get_PF_Scores() print (PF_dict) print PF_dict silhouette_dict={'846':0.0696665713558, '600':-0.00947942092601, '692':0.00677807984532, '859':0.00213644924873, '852':0.0536050806794, '548':0.049013577597, '713':-0.00076905984951, '733':-0.00283923942598, '466':-0.0101189101208, '689':0.000810431269303, '832':0.0306981850825} #mean # results={ # '846':(0.0696665713558,1.1162222222222222), # '600':(-0.00947942092601,1.3975757575757575), # '692':(0.00677807984532,1.3975757575757575), # '859':(0.00213644924873,0.77167676767676752), # '852':(0.0536050806794,0.40196969696969698), # '548':(0.049013577597,1.871171717171717), # '713':(-0.00076905984951,1.0085050505050503), # '466':(-0.0101189101208,1.174070707070707), # '689':(0.000810431269303,1.8117575757575759), # '832':(0.0306981850825,1.2264646464646465), # } #max results={ '846':(0.0696665713558,4.935), '600':(-0.00947942092601,4.739), '692':(0.00677807984532,4.739), '859':(0.00213644924873,3.85), '852':(0.0536050806794,0.807), '548':(0.049013577597,5.91), '713':(-0.00076905984951,7.183), '466':(-0.0101189101208,3.877), '689':(0.000810431269303,5.835), '832':(0.0306981850825,4.875), } X=[] Y=[] L=[] for k in results.keys(): print (k) x=results[k][0] y=results[k][1] l=k X.append(x) Y.append(y) L.append(l) fig, ax = plt.subplots() ax.scatter(X, Y) for i in range (len(X)): ax.annotate(L[i], (X[i], Y[i]),fontsize=8) save('../silhouette_PF_051619', ext="png", close=True, verbose=True) plot_data()