import os import sys import time import numpy import re import math import collections from collections import OrderedDict import random from sets import Set res_name_dict={'H':'HIS','K':'LYS','R':'ARG','D':'ASP','E':'GLU','S':'SER','T':'THR','N':'ASN','Q':'GLN','A':'ALA','V':'VAL','L':'LEU','I':'ILE','M':'MET','F':'PHE','Y':'TYR','W':'TRP','P':'PRO','G':'GLY', 'C':'CYS'} def PDB_all_folds_NOS_predicted_prob(numpy_ID,total_fold): print numpy_ID ptf_order=open('../data/ptf/'+numpy_ID+'_extracted.ptf') dict_list = list(ptf_order) files = [ os.path.join('../data/numpy/',f) for f in os.listdir('../data/numpy/') if os.path.isfile(os.path.join('../data/numpy/',f))] files = [t for t in files if numpy_ID in t] files = [t for t in files if '.dat' in t] total_num = len(files) if total_fold>1: all_prob = [] for fold in range(total_fold): for i in range(total_num): prob = numpy.load('../results/prob_score/'+numpy_ID+'_3DCNN_fold_'+str(fold)+'_'+str(i)+'.dat') if i == 0: fold_prob = prob else: fold_prob = numpy.concatenate((fold_prob,prob),axis=0) all_prob.append(fold_prob) else: all_prob = [] for i in range(total_num): prob = numpy.load('../results/prob_score/'+numpy_ID+'_3DCNN_'+str(i)+'.dat') if i == 0: fold_prob = prob else: fold_prob = numpy.concatenate((fold_prob,prob),axis=0) all_prob.append(fold_prob) summary_file=open('../results/summary/'+'PDB_prob_'+numpy_ID+'.txt','w') pdb_set = set() for i in range(len(dict_list)): line = dict_list[i] # chain_ID = line.strip('\n')[-6:] # chain = chain_ID[0] # res_no = chain_ID[1:] S=line.split() PDB_ID=S[0] x_ = float(S[1]) y_ = float(S[2]) z_ = float(S[3]) res = res_name_dict[S[4]] chain = S[5] res_no = S[6] if PDB_ID not in pdb_set: site_no = 0 pdb_set.add(PDB_ID) for fold in range(total_fold): probs_y = all_prob[fold][i] summary_file.write(PDB_ID+'\t'+str(site_no)+'\t'+str(x_)+'\t'+str(y_)+'\t'+str(z_)+'\t'+res+'\t'+chain+'\t'+str(res_no)+'\t'+str(fold)+'\t'+str(probs_y)+'\n') site_no+=1 def write_summary(numpy_ID,total_fold): result_list = open('../results/summary/'+'PDB_prob_'+numpy_ID+'.txt') integrate_file = open('../results/summary/'+'PDB_max_fold_prob_'+numpy_ID+'.txt','w') results_dict=collections.defaultdict(dict) for line in result_list: ele = line.split() pdb_id = ele[0] res = ele[-5] chain = ele[-4] res_no = ele[-3] fold = int(ele[-2]) prob = float(ele[-1]) if (chain,res,res_no) not in results_dict[pdb_id]: results_dict[pdb_id][(chain,res,res_no)]=[] results_dict[pdb_id][(chain,res,res_no)].append((prob,line)) for pdb_id in results_dict: for (chain,res,res_no) in results_dict[pdb_id]: entries = results_dict[pdb_id][(chain,res,res_no)] entries.sort(key=lambda entries: entries[0],reverse=True) max_ = entries[0] integrate_file.write(max_[1]) def detection_summary(numpy_ID,total_fold): PDB_all_folds_NOS_predicted_prob(numpy_ID,total_fold) write_summary(numpy_ID,total_fold) print ("writing summary files to") print ('../results/summary/'+'PDB_prob_'+numpy_ID+'.txt') print ('../results/summary/'+'PDB_max_fold_prob_'+numpy_ID+'.txt')