cif support

023ef208 · Christian Faber · 35a41c02 · 023ef208 · 023ef208 · 023ef208
Commit 023ef208 authored 9 months ago by Christian Faber
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as f:
 setup(
    name="BioHelpers_FABER",
-    version="0.2.5",
+    version="0.2.6",
    description="Small collection of useful scripts for the computational work with RNA Data.",
    url="https://gitlab.jsc.fz-juelich.de/faber1/biohelpers",
    package_dir={"": "src"},

--- a/src/BioHelpers_FABER.egg-info/PKG-INFO
+++ b/src/BioHelpers_FABER.egg-info/PKG-INFO
 Metadata-Version: 2.1
 Name: BioHelpers-FABER
-Version: 0.2.5
+Version: 0.2.6
 Summary: Small collection of useful scripts for the computational work with RNA Data.
 Home-page: https://gitlab.jsc.fz-juelich.de/faber1/biohelpers
 Author: Christian Faber

--- a/src/BioHelpers_FABER/rmsd.py
+++ b/src/BioHelpers_FABER/rmsd.py
 import numpy as np
 import Bio.PDB as pdb
-from os.path import exists
+from os.path import exists, splitext
 from typing import Optional
 import BioHelpers_FABER.bio_mod as bm
@@ -154,23 +154,36 @@ def rmsd(x: np.ndarray, y: np.ndarray) -> float:
    return np.sqrt(meanSquareDis)
+def __get_chain_from_file(file: str) -> pdb.Chain.Chain:
+    if not exists(file):
+        raise FileNotFoundError
+    _, extension = splitext(file)
+    if extension == ".pdb":
+        structure = pdb.PDBParser().get_structure("", file)[0]
+        chain = next(structure.get_chains())
+        return chain
+    elif extension == ".cif":
+        structure = pdb.MMCifParser().get_structure("", file)[0]
+        chain = next(structure.get_chains())
+        return chain
+    else:
+        raise ValueError
 def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float:
    """Calculate RMSD for two given PDBs
-    :param file1: Filename/Path of PDB1
+    :param file1: Filename/Path of PDB/CIF1
    :type file1: str
-    :param file2: Filename/Path of PDB2
+    :param file2: Filename/Path of PDB/CIF2
    :type file2: str
    :param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12)
    :type exclude_residues: list
    :return: RMSD as float, if files do not exist return 0
    :rtype: float
    """
-    if (exists(file1)) and (exists(file2)):
+    p1 = __get_chain_from_file(file1)
-        p1 = pdb.PDBParser().get_structure("PDB1", file1)[0]
+    p2 = __get_chain_from_file(file2)
-        p2 = pdb.PDBParser().get_structure("PDB2", file2)[0]
-        p1 = next(p1.get_chains())
-        p2 = next(p2.get_chains())
    bm.cleanup_chain(p1)
    bm.cleanup_chain(p2)
    if exclude_residues != None:
@@ -186,35 +199,30 @@ def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) ->
    p1_AA = np.transpose(np.array(p1_AA))
    p2_AA = np.transpose(np.array(p2_AA))
    return rmsd(p1_AA, p2_AA)
-    else:
-        return 0.0
 def rmsd_per_residue(file1: str, file2: str) -> list[float]:
    """RMSD for all residues seperately
-    :param file1: PDB File 1
+    :param file1: PDB/cif File 1
    :type file1: str
-    :param file2: PDB File 2
+    :param file2: PDB/cif File 2
    :type file2: str
    :return: List of length L (number of residues) with all rmsds per residue
    :rtype: list[float]
    """
-    if (exists(file1)) and (exists(file2)):
    values = []
    p1 = pdb.PDBParser().get_structure("PDB1", file1)[0]
    p2 = pdb.PDBParser().get_structure("PDB2", file2)[0]
-        p1 = next(p1.get_chains())
+    p1 = __get_chain_from_file(file1)
-        p2 = next(p2.get_chains())
+    p2 = __get_chain_from_file(file2)
    bm.cleanup_chain(p1)
    bm.cleanup_chain(p2)
    p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"]
    p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"]
    p1_AA = np.transpose(np.array(p1_AA))
    p2_AA = np.transpose(np.array(p2_AA))
-        rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector(
+    rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector(p1_AA, p2_AA)
-            p1_AA, p2_AA
-        )
    for res1, res2 in zip(p1.get_residues(), p2.get_residues()):
        res1_AA = [list(a.get_vector()) for a in res1.get_atoms()]
        res2_AA = [list(a.get_vector()) for a in res2.get_atoms()]