we made a function superimpose
which takes 7 arguments (PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out filename) .
Then we have extracted the respective coordinates of the respective atoms from the mentioned residues in the input for both the files using the get_coordinates
function.
After that, we have used the RMSD solution and added it to superimpose
function to calculate the RMSD rotation and translation matrix.
Then we have incorporated function download_pdb
into the superimpose
function in orderto download the required PDB files.
And at last, we changed the coordinates of all the atoms of the first file using the rotation and the translation matrix through the function apply_rot_trans
The script is :
import Bio
from Bio.PDB import *
from numpy import *
from numpy.linalg import svd, det
from pathlib import Path
def superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename):
# Parse PDB file
inputs = []
coordinates_both_files = []
inputs.append([PDB_id_1,Chain_id_1, Res_1])
inputs.append([PDB_id_2,Chain_id_2, Res_2])
for i in range(len(inputs)):
# downloading the pdb file
filename = download_pdb(inputs[i][0])
p=PDBParser()
s=p.get_structure(inputs[i][0], filename)
# Getting the required Chain
# hierarchy: structure[model][chain][residue no.][atom]
chain = s[0][inputs[i][1]]
coordinates_both_files.append(get_coordinates(chain, inputs[i][2]))
##################### superimpose ######################
# Nr of atoms
x = coordinates_both_files[0]
y = coordinates_both_files[1]
N=x.shape[1]
########################
# Center x and y
x=center(x)
y=center(y)
# correlation matrix
#r=y*x.T
r = x*y.T
# SVD of correlation matrix
#v, s, wt=svd(r)
v, s, wt=svd(r)
#w=wt.T
w=wt.T
#vt=v.T
vt=v.T
# Rotation matrix
u=w*vt
# Check for roto-reflection
if det(u)<0:
z=diag((1,1,-1))
u=w*z*vt
s[2]=-s[2]
# Translation matrix
#t = y - (u * x)
t = u * x+y
print("Translation Matrix:",t)
# Calculate RMSD
#e0=sum(x.A*x.A+y.A*y.A)
e0=sum(y.A*y.A+x.A*x.A)
print("E0 ", e0)
rmsd=sqrt((e0-2*sum(s))/N)
print('RMSD (svd) ', rmsd)
print("u estimated ")
print(u)
# Calculate RMSD from the coordinates
d=x-u*y
d=d.A*d.A
rmsd=sqrt(sum(d)/N)
print('RMSD (real) ', rmsd)
############ applying rotation and translation to all atoms of 1BBH ############
apply_rot_trans(u,t,inputs[0][0],out_filename)
def get_coordinates(chain, res_id_list):
backbone = ["N","CA","C","O"]
coordinates = []
for res_id in res_id_list:
for atom in backbone:
try:
# hierarchy: structure[model][chain][residue no.][atom]
# since we have a chain we can get the atom: atom = chain[residue no.][atom]
atom=chain[res_id][atom]
c=atom.get_coord()
coordinates.append(c)
except:
pass
# Turn coordinate list in 3xn numpy matrix
coordinates=matrix(coordinates) # nx3
coordinates=coordinates.T # 3xn
return coordinates
def center(m):
# Returns centered m
# Calculate center of mass of x
center_of_mass_m=m.sum(1)/m.shape[1]
# Center m
centered_m=m-center_of_mass_m
return centered_m
def download_pdb(pdb_id):
out_dir = Path.cwd()
filename = "pdb"+pdb_id+".ent"
# try and except block to prevent re downloading the same file on multiple executions of the code
try:
# trying to open the file
# will print the given error message if PDB file already exists
fr = open(filename,"r")
fr.close()
print("pdb file already exist")
except:
# will download the file only if it doen not already exists
pdbl = PDBList()
pdbl.retrieve_pdb_file(pdb_id, pdir = out_dir, file_format = "pdb")
return filename
def apply_rot_trans(u,t,pdb_id,out_filename):
superimposed_coord = []
filename = "pdb"+pdb_id+".ent"
p=PDBParser()
s=p.get_structure(pdb_id, filename)
atom_list = list(s.get_atoms())
for a in atom_list:
a.coord = dot(u[1],a.coord)+t
print(a.coord)
# writing the output to a pdb file
io = PDBIO()
struct = io.set_structure(s)
io.save(struct)
PDB_id_1 = "1BBH"
Chain_id_1 = "B"
Res_1 = [16,121,124,125]
PDB_id_2 = "5GYR"
Chain_id_2 = "B"
Res_2 = [16,121,124,125]
out_filename = "rotation.pdb"
superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)
But it gives me the following error :
TypeError Traceback (most recent call last)
<ipython-input-16-e0ae1f929075> in <module>()
150 Res_2 = [16,121,124,125]
151 out_filename = "rotation.pdb"
--> 152 superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)
<ipython-input-16-e0ae1f929075> in superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)
79
80 ############ applying rotation and translation to all atoms of 1BBH ############
---> 81 apply_rot_trans(u,t,inputs[0][0],out_filename)
82
83 def get_coordinates(chain, res_id_list):
<ipython-input-16-e0ae1f929075> in apply_rot_trans(u, t, pdb_id, out_filename)
140 io = PDBIO()
141 struct = io.set_structure(s)
--> 142 io.save(struct)
143
144
~Anaconda3libsite-packagesBioPDBPDBIO.py in save(self, file, select, write_end, preserve_atom_numbering)
376 resseq,
377 icode,
--> 378 chain_id,
379 )
380 fp.write(s)
~Anaconda3libsite-packagesBioPDBPDBIO.py in _get_atom_line(self, atom, hetfield, segid, atom_number, resname, resseq, icode, chain_id, charge)
227 charge,
228 )
--> 229 return _ATOM_FORMAT_STRING % args
230
231 else:
TypeError: only size-1 arrays can be converted to Python scalars
I was trying to write the output which unfortunately gave an error. Plz help me
question from:
https://stackoverflow.com/questions/65936129/unable-to-run-python-code-which-intends-to-show-superimposed-protein