Skip to content
Snippets Groups Projects
Select Git revision
  • 48785ddbc94e0830ad278d678dbb12dd05ef585b
  • master default protected
  • tf2
  • tf2_pytorch
  • issue_3
  • issue_2
  • 2019a
  • juwels_2019a
  • jureca_2019_a
9 results

lenet_auto_solver.prototxt

Blame
  • rmsd.py 8.32 KiB
    import numpy as np
    import Bio.PDB as pdb
    from os.path import exists, splitext
    from typing import Optional
    import BioHelpers_FABER.bio_mod as bm
    
    # All the notation is oriented on Using Quaternions to Calculate RMSD from Coutsias
    
    
    def calc_bary(x: np.ndarray) -> np.ndarray:
        """Calculation of the barycenter"""
        num_vect = np.shape(x)[1]
        return np.sum(x, axis=1) / num_vect
    
    
    def mat_R(x: np.ndarray, y: np.ndarray) -> np.ndarray:
        """Calculation of R matrix"""
        return np.matmul(x, np.transpose(y))
    
    
    def mat_F(r: np.ndarray) -> np.ndarray:
        f = np.ndarray(shape=(4, 4), dtype=float)
        f[0, 0] = r[0, 0] + r[1, 1] + r[2, 2]
        f[0, 1] = r[1, 2] - r[2, 1]
        f[0, 2] = r[2, 0] - r[0, 2]
        f[0, 3] = r[0, 1] - r[1, 0]
        f[1, 0] = r[1, 2] - r[2, 1]
        f[1, 1] = r[0, 0] - r[1, 1] - r[2, 2]
        f[1, 2] = r[0, 1] + r[1, 0]
        f[1, 3] = r[0, 2] + r[2, 0]
        f[2, 0] = r[2, 0] - r[0, 2]
        f[2, 1] = r[0, 1] + r[1, 0]
        f[2, 2] = -r[0, 0] + r[1, 1] - r[2, 2]
        f[2, 3] = r[1, 2] + r[2, 1]
        f[3, 0] = r[0, 1] - r[1, 0]
        f[3, 1] = r[0, 2] + r[2, 0]
        f[3, 2] = r[1, 2] + r[2, 1]
        f[3, 3] = -r[0, 0] - r[1, 1] + r[2, 2]
        return f
    
    
    def mat_U(q: np.ndarray) -> np.ndarray:
        """Calculation of the rotation matrix U from quaternion q
    
        :param q: Quaterion
        :type q: np.ndarray
        :return: 3x3 rotation matrix
        :rtype: np.ndarray
        """
        u = np.ndarray(shape=(3, 3), dtype=float)
        u[0, 0] = q[0] ** 2 + q[1] ** 2 - q[2] ** 2 - q[3] ** 2
        u[0, 1] = 2 * (q[1] * q[2] - q[0] * q[3])
        u[0, 2] = 2 * (q[1] * q[3] + q[0] * q[2])
        u[1, 0] = 2 * (q[1] * q[2] + q[0] * q[3])
        u[1, 1] = q[0] ** 2 - q[1] ** 2 + q[2] ** 2 - q[3] ** 2
        u[1, 2] = 2 * (q[2] * q[3] - q[0] * q[1])
        u[2, 0] = 2 * (q[1] * q[3] - q[0] * q[2])
        u[2, 1] = 2 * (q[2] * q[3] + q[0] * q[1])
        u[2, 2] = q[0] ** 2 - q[1] ** 2 - q[2] ** 2 + q[3] ** 2
        return u
    
    
    def get_rotation_matrix(x: np.ndarray, y: np.ndarray) -> np.ndarray:
        """Get the rotation matrix to a corresponding set of atom vectors
    
        :param x: set 1
        :type x: np.ndarray
        :param y: set 2
        :type y: np.ndarray
        :return: rotation matrix U(q_max)
        :rtype: np.ndarray
        """
        # Find the barycenter and shift input sets
        shift_x = np.transpose(np.transpose(x) - calc_bary(x))
        shift_y = np.transpose(np.transpose(y) - calc_bary(y))
    
        # Calculate the max eigenvalue for F
        eigenval, eigenvec = np.linalg.eig(mat_F(mat_R(shift_x, shift_y)))
        index_max = np.argmax(eigenval)
        return mat_U(eigenvec[:, index_max])
    
    
    def get_translation_vector(x: np.ndarray, y: np.ndarray) -> np.ndarray:
        """Get the translation vector :math:`\mathbf{r}` |br|
        **Important Note:** The translation vector differs from Kabsch algorithm, there you translate befor rotating. In Coutsias paper it is the otherway round.
    
        :param x: set of vectors
        :type x: np.ndarray
        :param y: set of vectors
        :type y: np.ndarray
        :return: translation vector w/ shape (3,1)
        :rtype: np.ndarray
        """
        rot_mat = get_rotation_matrix(x, y)
        r = calc_bary(y) - np.matmul(rot_mat, calc_bary(x))
        return r
    
    
    def rmsd_from_rot_tran(
        x: np.ndarray, y: np.ndarray, rot: np.ndarray, trans: np.ndarray
    ) -> float:
        """Calculate an RMSD like value for a given set of vectors x and y with a given rotation matrix and translation vector
        Mathematical formula:
    
        .. math:: \mathrm{E} = \\frac{1}{M} \sum_{k=1..M} |\mathscr{U}\mathbf{x}^k + \mathbf{r} - \mathbf{y}^k|^2
    
        **Important Note:** The translation vector differs from Kabsch algorithm, there you translate before rotating. In Coutsias paper it is the otherway round.
    
        :param x: Set of vectors
        :type x: np.ndarray
        :param y: Set of vectors
        :type y: np.ndarray
        :param rot: rotation matrix (3x3)
        :type rot: np.ndarray
        :param trans: Translation vector r, see note above
        :type trans: np.ndarray
        :return: RMSD like value (:math:`\sqrt{E}`)
        :rtype: float
        """
        num_vec = x.shape[1]
        intermediate = np.matmul(rot, x) + trans[:, np.newaxis] + y
        squared = np.trace(np.matmul(np.transpose(intermediate), intermediate)) / num_vec
        return np.sqrt(squared)
    
    
    def rmsd(x: np.ndarray, y: np.ndarray) -> float:
        """Calculation of Root Mean Square Displacement
    
        :param x: Array of all atoms, molecule 1
        :type x: np.ndarray
        :param y: Array of all atoms, molecule 2
        :type y: np.ndarray
        :return: RMSD
        :rtype: float
        """
    
        num_vect = np.shape(x)[1]
    
        # Find the barycenter and shift input sets
        shift_x = np.transpose(np.transpose(x) - calc_bary(x))
        shift_y = np.transpose(np.transpose(y) - calc_bary(y))
    
        # Calculate the max eigenvalue for F
        eigenval, _ = np.linalg.eig(mat_F(mat_R(shift_x, shift_y)))
        lam = np.sort(eigenval)[-1]  # Choose largest eigenvalue
    
        # Squared vectors
        x_square = np.trace(np.matmul(np.transpose(shift_x), shift_x))
        y_square = np.trace(np.matmul(np.transpose(shift_y), shift_y))
    
        # RMSD
        meanSquareDis = round((x_square + y_square - 2 * lam) / num_vect, 4)
    
        return np.sqrt(meanSquareDis)
    
    
    def __get_chain_from_file(file: str) -> pdb.Chain.Chain:
        if not exists(file):
            raise FileNotFoundError
        _, extension = splitext(file)
        if extension == ".pdb":
            structure = pdb.PDBParser().get_structure("", file)[0]
            chain = next(structure.get_chains())
            return chain
        elif extension == ".cif":
            structure = pdb.MMCIFParser().get_structure("", file)[0]
            chain = next(structure.get_chains())
            return chain
        else:
            raise ValueError
    
    
    def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float:
        """Calculate RMSD for two given PDBs
    
        :param file1: Filename/Path of PDB/CIF1
        :type file1: str
        :param file2: Filename/Path of PDB/CIF2
        :type file2: str
        :param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12)
        :type exclude_residues: list
        :return: RMSD as float, if files do not exist return 0
        :rtype: float
        """
        p1 = __get_chain_from_file(file1)
        p2 = __get_chain_from_file(file2)
        bm.cleanup_chain(p1)
        bm.cleanup_chain(p2)
        if exclude_residues != None:
            for border in exclude_residues:
                ids = list(range(border[0], border[1] + 1))
                for i in ids:
                    p1.detach_child((" ", i, " "))
                    p2.detach_child((" ", i, " "))
        # p1_AA = [list(a.get_vector()) for a in p1.get_atoms()]
        # p2_AA =  [list(a.get_vector()) for a in p2.get_atoms()]
        p1_AA = [
            list(a.get_vector())
            for a in p1.get_atoms()
            if a.id != "OP3" and a.element != "H"
        ]
        p2_AA = [
            list(a.get_vector())
            for a in p2.get_atoms()
            if a.id != "OP3" and a.element != "H"
        ]
        p1_AA = np.transpose(np.array(p1_AA))
        p2_AA = np.transpose(np.array(p2_AA))
        return rmsd(p1_AA, p2_AA)
    
    
    def rmsd_per_residue(file1: str, file2: str) -> list[float]:
        """RMSD for all residues seperately
    
        :param file1: PDB/cif File 1
        :type file1: str
        :param file2: PDB/cif File 2
        :type file2: str
        :return: List of length L (number of residues) with all rmsds per residue
        :rtype: list[float]
        """
        values = []
        p1 = __get_chain_from_file(file1)
        p2 = __get_chain_from_file(file2)
        bm.cleanup_chain(p1)
        bm.cleanup_chain(p2)
        p1_AA = [
            list(a.get_vector())
            for a in p1.get_atoms()
            if a.id != "OP3" and a.element != "H"
        ]
        p2_AA = [
            list(a.get_vector())
            for a in p2.get_atoms()
            if a.id != "OP3" and a.element != "H"
        ]
        p1_AA = np.transpose(np.array(p1_AA))
        p2_AA = np.transpose(np.array(p2_AA))
        rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector(p1_AA, p2_AA)
        for res1, res2 in zip(p1.get_residues(), p2.get_residues()):
            res1_AA = [list(a.get_vector()) for a in res1.get_atoms() if a.element != "H"]
            res2_AA = [list(a.get_vector()) for a in res2.get_atoms() if a.element != "H"]
            res1_AA = np.transpose(np.array(res1_AA))
            res2_AA = np.transpose(np.array(res2_AA))
            values.append(rmsd_from_rot_tran(res1_AA, res2_AA, rot, trans))
    
        return values
    
    
    if __name__ == "__main__":
        print("------------------")
        print("     RMSD 1.1     ")
        print("------------------\n")
        print("Module to calculate the RMSD.")