rmsd with excluded regions

9639f238 · Christian Faber · f6f7fd74 · 9639f238 · 9639f238 · 9639f238
Commit 9639f238 authored 8 months ago by Christian Faber
--- a/4gma_rpr.pdb
+++ b/4gma_rpr.pdb
--- a/4gma_rpr_barnacle.pdb
+++ b/4gma_rpr_barnacle.pdb
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as f:
 setup(
    name="BioHelpers_FABER",
-    version="0.2.3",
+    version="0.2.4",
    description="Small collection of useful scripts for the computational work with RNA Data.",
    url="https://gitlab.jsc.fz-juelich.de/faber1/biohelpers",
    package_dir={"": "src"},

--- a/src/BioHelpers_FABER.egg-info/PKG-INFO
+++ b/src/BioHelpers_FABER.egg-info/PKG-INFO
 Metadata-Version: 2.1
 Name: BioHelpers-FABER
-Version: 0.2.2
+Version: 0.2.3
 Summary: Small collection of useful scripts for the computational work with RNA Data.
 Home-page: https://gitlab.jsc.fz-juelich.de/faber1/biohelpers
 Author: Christian Faber

--- a/src/BioHelpers_FABER/rmsd.py
+++ b/src/BioHelpers_FABER/rmsd.py
 import numpy as np
 import Bio.PDB as pdb
 from os.path import exists
+from typing import Optional
 import BioHelpers_FABER.bio_mod as bm
 # All the notation is oriented on Using Quaternions to Calculate RMSD from Coutsias
 def calc_bary(x: np.ndarray) -> np.ndarray:
    """Calculation of the barycenter"""
    num_vect = np.shape(x)[1]
    return np.sum(x, axis=1) / num_vect
 def mat_R(x: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Calculation of R matrix"""
    return np.matmul(x, np.transpose(y))
 def mat_F(r: np.ndarray) -> np.ndarray:
    f = np.ndarray(shape=(4, 4), dtype=float)
    f[0, 0] = r[0, 0] + r[1, 1] + r[2, 2]
@@ -65,17 +69,20 @@ def rmsd(x: np.ndarray, y: np.ndarray) -> float:
    return np.sqrt(meanSquareDis)
-def rmsd_pdb(file1: str, file2: str) -> float:
+def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float:
    """Calculate RMSD for two given PDBs
    :param file1: Filename/Path of PDB1
    :type file1: str
    :param file2: Filename/Path of PDB2
    :type file2: str
+    :param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12)
+    :type exclude_residues: list
    :return: RMSD as float, if files do not exist return 0
    :rtype: float
    """
+    print(exclude_residues)
    if (exists(file1)) and (exists(file2)):
        p1 = pdb.PDBParser().get_structure("PDB1", file1)[0]
        p2 = pdb.PDBParser().get_structure("PDB2", file2)[0]
@@ -83,10 +90,16 @@ def rmsd_pdb(file1: str, file2: str) -> float:
        p2 = next(p2.get_chains())
        bm.cleanup_chain(p1)
        bm.cleanup_chain(p2)
+        if exclude_residues != None:
+            for border in exclude_residues:
+                ids = list(range(border[0], border[1] + 1))
+                for i in ids:
+                    p1.detach_child((" ", i, " "))
+                    p2.detach_child((" ", i, " "))
        # p1_AA = [list(a.get_vector()) for a in p1.get_atoms()]
        # p2_AA =  [list(a.get_vector()) for a in p2.get_atoms()]
-        p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != 'OP3']
+        p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"]
-        p2_AA =  [list(a.get_vector()) for a in p2.get_atoms() if a.id != 'OP3']
+        p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"]
        p1_AA = np.transpose(np.array(p1_AA))
        p2_AA = np.transpose(np.array(p2_AA))
        return rmsd(p1_AA, p2_AA)
@@ -94,11 +107,8 @@ def rmsd_pdb(file1: str, file2: str) -> float:
        return 0.0
 if __name__ == "__main__":
    print("------------------")
    print("     RMSD 1.1     ")
    print("------------------\n")
    print("Module to calculate the RMSD.")
--- a/test_rmsd.py
+++ b/test_rmsd.py
+import src.BioHelpers_FABER.rmsd as rmsd
+def main():
+    file1 = "4gma_rpr.pdb"
+    file2 = "4gma_rpr_barnacle.pdb"
+    unmodelled_regions = [(1, 2), (25, 32), (107, 109), (179, 183)]
+    val = rmsd.rmsd_pdb(file1, file2, exclude_residues=unmodelled_regions)
+    print("Hallo Welt", val)
+if __name__ == "__main__":
+    main()