Skip to content
Snippets Groups Projects
Commit 9639f238 authored by Christian Faber's avatar Christian Faber
Browse files

rmsd with excluded regions

parent f6f7fd74
No related branches found
No related tags found
No related merge requests found
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
...@@ -5,7 +5,7 @@ with open("README.md", "r") as f: ...@@ -5,7 +5,7 @@ with open("README.md", "r") as f:
setup( setup(
name="BioHelpers_FABER", name="BioHelpers_FABER",
version="0.2.3", version="0.2.4",
description="Small collection of useful scripts for the computational work with RNA Data.", description="Small collection of useful scripts for the computational work with RNA Data.",
url="https://gitlab.jsc.fz-juelich.de/faber1/biohelpers", url="https://gitlab.jsc.fz-juelich.de/faber1/biohelpers",
package_dir={"": "src"}, package_dir={"": "src"},
......
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: BioHelpers-FABER Name: BioHelpers-FABER
Version: 0.2.2 Version: 0.2.3
Summary: Small collection of useful scripts for the computational work with RNA Data. Summary: Small collection of useful scripts for the computational work with RNA Data.
Home-page: https://gitlab.jsc.fz-juelich.de/faber1/biohelpers Home-page: https://gitlab.jsc.fz-juelich.de/faber1/biohelpers
Author: Christian Faber Author: Christian Faber
......
import numpy as np import numpy as np
import Bio.PDB as pdb import Bio.PDB as pdb
from os.path import exists from os.path import exists
from typing import Optional
import BioHelpers_FABER.bio_mod as bm import BioHelpers_FABER.bio_mod as bm
# All the notation is oriented on Using Quaternions to Calculate RMSD from Coutsias # All the notation is oriented on Using Quaternions to Calculate RMSD from Coutsias
def calc_bary(x: np.ndarray) -> np.ndarray: def calc_bary(x: np.ndarray) -> np.ndarray:
"""Calculation of the barycenter""" """Calculation of the barycenter"""
num_vect = np.shape(x)[1] num_vect = np.shape(x)[1]
return np.sum(x, axis=1) / num_vect return np.sum(x, axis=1) / num_vect
def mat_R(x: np.ndarray, y: np.ndarray) -> np.ndarray: def mat_R(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Calculation of R matrix""" """Calculation of R matrix"""
return np.matmul(x, np.transpose(y)) return np.matmul(x, np.transpose(y))
def mat_F(r: np.ndarray) -> np.ndarray: def mat_F(r: np.ndarray) -> np.ndarray:
f = np.ndarray(shape=(4, 4), dtype=float) f = np.ndarray(shape=(4, 4), dtype=float)
f[0, 0] = r[0, 0] + r[1, 1] + r[2, 2] f[0, 0] = r[0, 0] + r[1, 1] + r[2, 2]
...@@ -65,17 +69,20 @@ def rmsd(x: np.ndarray, y: np.ndarray) -> float: ...@@ -65,17 +69,20 @@ def rmsd(x: np.ndarray, y: np.ndarray) -> float:
return np.sqrt(meanSquareDis) return np.sqrt(meanSquareDis)
def rmsd_pdb(file1: str, file2: str) -> float:
def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float:
"""Calculate RMSD for two given PDBs """Calculate RMSD for two given PDBs
:param file1: Filename/Path of PDB1 :param file1: Filename/Path of PDB1
:type file1: str :type file1: str
:param file2: Filename/Path of PDB2 :param file2: Filename/Path of PDB2
:type file2: str :type file2: str
:param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12)
:type exclude_residues: list
:return: RMSD as float, if files do not exist return 0 :return: RMSD as float, if files do not exist return 0
:rtype: float :rtype: float
""" """
print(exclude_residues)
if (exists(file1)) and (exists(file2)): if (exists(file1)) and (exists(file2)):
p1 = pdb.PDBParser().get_structure("PDB1", file1)[0] p1 = pdb.PDBParser().get_structure("PDB1", file1)[0]
p2 = pdb.PDBParser().get_structure("PDB2", file2)[0] p2 = pdb.PDBParser().get_structure("PDB2", file2)[0]
...@@ -83,10 +90,16 @@ def rmsd_pdb(file1: str, file2: str) -> float: ...@@ -83,10 +90,16 @@ def rmsd_pdb(file1: str, file2: str) -> float:
p2 = next(p2.get_chains()) p2 = next(p2.get_chains())
bm.cleanup_chain(p1) bm.cleanup_chain(p1)
bm.cleanup_chain(p2) bm.cleanup_chain(p2)
if exclude_residues != None:
for border in exclude_residues:
ids = list(range(border[0], border[1] + 1))
for i in ids:
p1.detach_child((" ", i, " "))
p2.detach_child((" ", i, " "))
# p1_AA = [list(a.get_vector()) for a in p1.get_atoms()] # p1_AA = [list(a.get_vector()) for a in p1.get_atoms()]
# p2_AA = [list(a.get_vector()) for a in p2.get_atoms()] # p2_AA = [list(a.get_vector()) for a in p2.get_atoms()]
p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != 'OP3'] p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"]
p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != 'OP3'] p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"]
p1_AA = np.transpose(np.array(p1_AA)) p1_AA = np.transpose(np.array(p1_AA))
p2_AA = np.transpose(np.array(p2_AA)) p2_AA = np.transpose(np.array(p2_AA))
return rmsd(p1_AA, p2_AA) return rmsd(p1_AA, p2_AA)
...@@ -94,11 +107,8 @@ def rmsd_pdb(file1: str, file2: str) -> float: ...@@ -94,11 +107,8 @@ def rmsd_pdb(file1: str, file2: str) -> float:
return 0.0 return 0.0
if __name__ == "__main__": if __name__ == "__main__":
print("------------------") print("------------------")
print(" RMSD 1.1 ") print(" RMSD 1.1 ")
print("------------------\n") print("------------------\n")
print("Module to calculate the RMSD.") print("Module to calculate the RMSD.")
import src.BioHelpers_FABER.rmsd as rmsd
def main():
file1 = "4gma_rpr.pdb"
file2 = "4gma_rpr_barnacle.pdb"
unmodelled_regions = [(1, 2), (25, 32), (107, 109), (179, 183)]
val = rmsd.rmsd_pdb(file1, file2, exclude_residues=unmodelled_regions)
print("Hallo Welt", val)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment