Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
133 views
in Technique[技术] by (71.8m points)

Unable to run python code which intends to show superimposed protein

we made a function superimpose which takes 7 arguments (PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out filename) .

Then we have extracted the respective coordinates of the respective atoms from the mentioned residues in the input for both the files using the get_coordinates function.

After that, we have used the RMSD solution and added it to superimpose function to calculate the RMSD rotation and translation matrix.

Then we have incorporated function download_pdb into the superimpose function in orderto download the required PDB files.

And at last, we changed the coordinates of all the atoms of the first file using the rotation and the translation matrix through the function apply_rot_trans

The script is :

import Bio
from Bio.PDB import *
from numpy import *
from numpy.linalg import svd, det
from pathlib import Path

def superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename):
    # Parse PDB file
    inputs = []
    coordinates_both_files = []
    inputs.append([PDB_id_1,Chain_id_1, Res_1])
    inputs.append([PDB_id_2,Chain_id_2, Res_2])
    for i in range(len(inputs)):
        # downloading the pdb file
        filename = download_pdb(inputs[i][0])
        p=PDBParser()
        s=p.get_structure(inputs[i][0], filename)
        # Getting the required Chain 
        # hierarchy: structure[model][chain][residue no.][atom]
        chain = s[0][inputs[i][1]]
        coordinates_both_files.append(get_coordinates(chain, inputs[i][2]))

    ##################### superimpose ######################
    # Nr of atoms
    x = coordinates_both_files[0]
    y = coordinates_both_files[1]
    N=x.shape[1]

    ########################

    # Center x and y
    x=center(x)
    y=center(y)

    # correlation matrix
    #r=y*x.T
    r = x*y.T

    # SVD of correlation matrix
    #v, s, wt=svd(r)
    v, s, wt=svd(r)

    #w=wt.T
    w=wt.T

    #vt=v.T
    vt=v.T

    # Rotation matrix
    u=w*vt

    # Check for roto-reflection
    if det(u)<0:
        z=diag((1,1,-1))
        u=w*z*vt
        s[2]=-s[2]

    # Translation matrix
    #t = y - (u * x)
    t = u * x+y
    print("Translation Matrix:",t)

    # Calculate RMSD
    #e0=sum(x.A*x.A+y.A*y.A)
    e0=sum(y.A*y.A+x.A*x.A)
    print("E0 ", e0)
    rmsd=sqrt((e0-2*sum(s))/N)

    print('RMSD (svd) ', rmsd)

    print("u estimated ")
    print(u)

    # Calculate RMSD from the coordinates 
    d=x-u*y
    d=d.A*d.A
    rmsd=sqrt(sum(d)/N)
    print('RMSD (real) ', rmsd)

    ############ applying rotation and translation to all atoms of 1BBH ############
    apply_rot_trans(u,t,inputs[0][0],out_filename)

def get_coordinates(chain, res_id_list):
    backbone = ["N","CA","C","O"]
    coordinates = []
    for res_id in res_id_list:
        for atom in backbone:
            try:
                # hierarchy: structure[model][chain][residue no.][atom]
                # since we have a chain we can get the atom: atom = chain[residue no.][atom]
                atom=chain[res_id][atom]
                c=atom.get_coord()
                coordinates.append(c)
            except:
                pass

    # Turn coordinate list in 3xn numpy matrix
    coordinates=matrix(coordinates) # nx3
    coordinates=coordinates.T       # 3xn
    return coordinates

def center(m):
   # Returns centered m
   # Calculate center of mass of x
    center_of_mass_m=m.sum(1)/m.shape[1]
    # Center m
    centered_m=m-center_of_mass_m
    return centered_m 

def download_pdb(pdb_id):
    out_dir = Path.cwd()
    filename = "pdb"+pdb_id+".ent"
    # try and except block to prevent re downloading the same file on multiple executions of the code
    try:
        # trying to open the file
        # will print the given error message if PDB file already exists
        fr = open(filename,"r")
        fr.close()
        print("pdb file already exist") 

    except:
        # will download the file only if it doen not already exists
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(pdb_id, pdir = out_dir, file_format = "pdb")

    return filename

def apply_rot_trans(u,t,pdb_id,out_filename):
    superimposed_coord = []
    filename = "pdb"+pdb_id+".ent"
    p=PDBParser()
    s=p.get_structure(pdb_id, filename)
    atom_list = list(s.get_atoms())

    for a in atom_list:
        a.coord = dot(u[1],a.coord)+t
        print(a.coord)

    # writing the output to a pdb file    
    io = PDBIO()
    struct = io.set_structure(s)
    io.save(struct)


PDB_id_1 = "1BBH"
Chain_id_1 = "B"
Res_1 = [16,121,124,125]
PDB_id_2 = "5GYR"
Chain_id_2 = "B"
Res_2 = [16,121,124,125]
out_filename = "rotation.pdb"
superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)

But it gives me the following error :

TypeError                                 Traceback (most recent call last)
<ipython-input-16-e0ae1f929075> in <module>()
    150 Res_2 = [16,121,124,125]
    151 out_filename = "rotation.pdb"
--> 152 superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)

<ipython-input-16-e0ae1f929075> in superimpose(PDB_id_1, Chain_id_1, Res_1, PDB_id_2, Chain_id_2, Res_2, out_filename)
     79 
     80     ############ applying rotation and translation to all atoms of 1BBH ############
---> 81     apply_rot_trans(u,t,inputs[0][0],out_filename)
     82 
     83 def get_coordinates(chain, res_id_list):

<ipython-input-16-e0ae1f929075> in apply_rot_trans(u, t, pdb_id, out_filename)
    140     io = PDBIO()
    141     struct = io.set_structure(s)
--> 142     io.save(struct)
    143 
    144 

~Anaconda3libsite-packagesBioPDBPDBIO.py in save(self, file, select, write_end, preserve_atom_numbering)
    376                                 resseq,
    377                                 icode,
--> 378                                 chain_id,
    379                             )
    380                             fp.write(s)

~Anaconda3libsite-packagesBioPDBPDBIO.py in _get_atom_line(self, atom, hetfield, segid, atom_number, resname, resseq, icode, chain_id, charge)
    227                 charge,
    228             )
--> 229             return _ATOM_FORMAT_STRING % args
    230 
    231         else:

TypeError: only size-1 arrays can be converted to Python scalars

I was trying to write the output which unfortunately gave an error. Plz help me

question from:https://stackoverflow.com/questions/65936129/unable-to-run-python-code-which-intends-to-show-superimposed-protein

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...