Select Git revision
lenet_auto_solver.prototxt
rmsd.py 8.32 KiB
import numpy as np
import Bio.PDB as pdb
from os.path import exists, splitext
from typing import Optional
import BioHelpers_FABER.bio_mod as bm
# All the notation is oriented on Using Quaternions to Calculate RMSD from Coutsias
def calc_bary(x: np.ndarray) -> np.ndarray:
"""Calculation of the barycenter"""
num_vect = np.shape(x)[1]
return np.sum(x, axis=1) / num_vect
def mat_R(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Calculation of R matrix"""
return np.matmul(x, np.transpose(y))
def mat_F(r: np.ndarray) -> np.ndarray:
f = np.ndarray(shape=(4, 4), dtype=float)
f[0, 0] = r[0, 0] + r[1, 1] + r[2, 2]
f[0, 1] = r[1, 2] - r[2, 1]
f[0, 2] = r[2, 0] - r[0, 2]
f[0, 3] = r[0, 1] - r[1, 0]
f[1, 0] = r[1, 2] - r[2, 1]
f[1, 1] = r[0, 0] - r[1, 1] - r[2, 2]
f[1, 2] = r[0, 1] + r[1, 0]
f[1, 3] = r[0, 2] + r[2, 0]
f[2, 0] = r[2, 0] - r[0, 2]
f[2, 1] = r[0, 1] + r[1, 0]
f[2, 2] = -r[0, 0] + r[1, 1] - r[2, 2]
f[2, 3] = r[1, 2] + r[2, 1]
f[3, 0] = r[0, 1] - r[1, 0]
f[3, 1] = r[0, 2] + r[2, 0]
f[3, 2] = r[1, 2] + r[2, 1]
f[3, 3] = -r[0, 0] - r[1, 1] + r[2, 2]
return f
def mat_U(q: np.ndarray) -> np.ndarray:
"""Calculation of the rotation matrix U from quaternion q
:param q: Quaterion
:type q: np.ndarray
:return: 3x3 rotation matrix
:rtype: np.ndarray
"""
u = np.ndarray(shape=(3, 3), dtype=float)
u[0, 0] = q[0] ** 2 + q[1] ** 2 - q[2] ** 2 - q[3] ** 2
u[0, 1] = 2 * (q[1] * q[2] - q[0] * q[3])
u[0, 2] = 2 * (q[1] * q[3] + q[0] * q[2])
u[1, 0] = 2 * (q[1] * q[2] + q[0] * q[3])
u[1, 1] = q[0] ** 2 - q[1] ** 2 + q[2] ** 2 - q[3] ** 2
u[1, 2] = 2 * (q[2] * q[3] - q[0] * q[1])
u[2, 0] = 2 * (q[1] * q[3] - q[0] * q[2])
u[2, 1] = 2 * (q[2] * q[3] + q[0] * q[1])
u[2, 2] = q[0] ** 2 - q[1] ** 2 - q[2] ** 2 + q[3] ** 2
return u
def get_rotation_matrix(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Get the rotation matrix to a corresponding set of atom vectors
:param x: set 1
:type x: np.ndarray
:param y: set 2
:type y: np.ndarray
:return: rotation matrix U(q_max)
:rtype: np.ndarray
"""
# Find the barycenter and shift input sets
shift_x = np.transpose(np.transpose(x) - calc_bary(x))
shift_y = np.transpose(np.transpose(y) - calc_bary(y))
# Calculate the max eigenvalue for F
eigenval, eigenvec = np.linalg.eig(mat_F(mat_R(shift_x, shift_y)))
index_max = np.argmax(eigenval)
return mat_U(eigenvec[:, index_max])
def get_translation_vector(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Get the translation vector :math:`\mathbf{r}` |br|
**Important Note:** The translation vector differs from Kabsch algorithm, there you translate befor rotating. In Coutsias paper it is the otherway round.
:param x: set of vectors
:type x: np.ndarray
:param y: set of vectors
:type y: np.ndarray
:return: translation vector w/ shape (3,1)
:rtype: np.ndarray
"""
rot_mat = get_rotation_matrix(x, y)
r = calc_bary(y) - np.matmul(rot_mat, calc_bary(x))
return r
def rmsd_from_rot_tran(
x: np.ndarray, y: np.ndarray, rot: np.ndarray, trans: np.ndarray
) -> float:
"""Calculate an RMSD like value for a given set of vectors x and y with a given rotation matrix and translation vector
Mathematical formula:
.. math:: \mathrm{E} = \\frac{1}{M} \sum_{k=1..M} |\mathscr{U}\mathbf{x}^k + \mathbf{r} - \mathbf{y}^k|^2
**Important Note:** The translation vector differs from Kabsch algorithm, there you translate before rotating. In Coutsias paper it is the otherway round.
:param x: Set of vectors
:type x: np.ndarray
:param y: Set of vectors
:type y: np.ndarray
:param rot: rotation matrix (3x3)
:type rot: np.ndarray
:param trans: Translation vector r, see note above
:type trans: np.ndarray
:return: RMSD like value (:math:`\sqrt{E}`)
:rtype: float
"""
num_vec = x.shape[1]
intermediate = np.matmul(rot, x) + trans[:, np.newaxis] + y
squared = np.trace(np.matmul(np.transpose(intermediate), intermediate)) / num_vec
return np.sqrt(squared)
def rmsd(x: np.ndarray, y: np.ndarray) -> float:
"""Calculation of Root Mean Square Displacement
:param x: Array of all atoms, molecule 1
:type x: np.ndarray
:param y: Array of all atoms, molecule 2
:type y: np.ndarray
:return: RMSD
:rtype: float
"""
num_vect = np.shape(x)[1]
# Find the barycenter and shift input sets
shift_x = np.transpose(np.transpose(x) - calc_bary(x))
shift_y = np.transpose(np.transpose(y) - calc_bary(y))
# Calculate the max eigenvalue for F
eigenval, _ = np.linalg.eig(mat_F(mat_R(shift_x, shift_y)))
lam = np.sort(eigenval)[-1] # Choose largest eigenvalue
# Squared vectors
x_square = np.trace(np.matmul(np.transpose(shift_x), shift_x))
y_square = np.trace(np.matmul(np.transpose(shift_y), shift_y))
# RMSD
meanSquareDis = round((x_square + y_square - 2 * lam) / num_vect, 4)
return np.sqrt(meanSquareDis)
def __get_chain_from_file(file: str) -> pdb.Chain.Chain:
if not exists(file):
raise FileNotFoundError
_, extension = splitext(file)
if extension == ".pdb":
structure = pdb.PDBParser().get_structure("", file)[0]
chain = next(structure.get_chains())
return chain
elif extension == ".cif":
structure = pdb.MMCIFParser().get_structure("", file)[0]
chain = next(structure.get_chains())
return chain
else:
raise ValueError
def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float:
"""Calculate RMSD for two given PDBs
:param file1: Filename/Path of PDB/CIF1
:type file1: str
:param file2: Filename/Path of PDB/CIF2
:type file2: str
:param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12)
:type exclude_residues: list
:return: RMSD as float, if files do not exist return 0
:rtype: float
"""
p1 = __get_chain_from_file(file1)
p2 = __get_chain_from_file(file2)
bm.cleanup_chain(p1)
bm.cleanup_chain(p2)
if exclude_residues != None:
for border in exclude_residues:
ids = list(range(border[0], border[1] + 1))
for i in ids:
p1.detach_child((" ", i, " "))
p2.detach_child((" ", i, " "))
# p1_AA = [list(a.get_vector()) for a in p1.get_atoms()]
# p2_AA = [list(a.get_vector()) for a in p2.get_atoms()]
p1_AA = [
list(a.get_vector())
for a in p1.get_atoms()
if a.id != "OP3" and a.element != "H"
]
p2_AA = [
list(a.get_vector())
for a in p2.get_atoms()
if a.id != "OP3" and a.element != "H"
]
p1_AA = np.transpose(np.array(p1_AA))
p2_AA = np.transpose(np.array(p2_AA))
return rmsd(p1_AA, p2_AA)
def rmsd_per_residue(file1: str, file2: str) -> list[float]:
"""RMSD for all residues seperately
:param file1: PDB/cif File 1
:type file1: str
:param file2: PDB/cif File 2
:type file2: str
:return: List of length L (number of residues) with all rmsds per residue
:rtype: list[float]
"""
values = []
p1 = __get_chain_from_file(file1)
p2 = __get_chain_from_file(file2)
bm.cleanup_chain(p1)
bm.cleanup_chain(p2)
p1_AA = [
list(a.get_vector())
for a in p1.get_atoms()
if a.id != "OP3" and a.element != "H"
]
p2_AA = [
list(a.get_vector())
for a in p2.get_atoms()
if a.id != "OP3" and a.element != "H"
]
p1_AA = np.transpose(np.array(p1_AA))
p2_AA = np.transpose(np.array(p2_AA))
rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector(p1_AA, p2_AA)
for res1, res2 in zip(p1.get_residues(), p2.get_residues()):
res1_AA = [list(a.get_vector()) for a in res1.get_atoms() if a.element != "H"]
res2_AA = [list(a.get_vector()) for a in res2.get_atoms() if a.element != "H"]
res1_AA = np.transpose(np.array(res1_AA))
res2_AA = np.transpose(np.array(res2_AA))
values.append(rmsd_from_rot_tran(res1_AA, res2_AA, rot, trans))
return values
if __name__ == "__main__":
print("------------------")
print(" RMSD 1.1 ")
print("------------------\n")
print("Module to calculate the RMSD.")