diff --git a/setup.py b/setup.py index a6e754388087e7f56737c8f56653dd28d5305bcd..dc0b7ae8d39f8c926e9f37206cfd444d0b81e0fe 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as f: setup( name="BioHelpers_FABER", - version="0.2.5", + version="0.2.6", description="Small collection of useful scripts for the computational work with RNA Data.", url="https://gitlab.jsc.fz-juelich.de/faber1/biohelpers", package_dir={"": "src"}, diff --git a/src/BioHelpers_FABER.egg-info/PKG-INFO b/src/BioHelpers_FABER.egg-info/PKG-INFO index a568cf1a0fa333965bacf6cd64652b14e4384cd8..8f9c070d5cb1d6a1dc49980ae10dc58ea2d17d8f 100644 --- a/src/BioHelpers_FABER.egg-info/PKG-INFO +++ b/src/BioHelpers_FABER.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: BioHelpers-FABER -Version: 0.2.5 +Version: 0.2.6 Summary: Small collection of useful scripts for the computational work with RNA Data. Home-page: https://gitlab.jsc.fz-juelich.de/faber1/biohelpers Author: Christian Faber diff --git a/src/BioHelpers_FABER/rmsd.py b/src/BioHelpers_FABER/rmsd.py index 7cafa106b6ae4564a6858fb3207e208307d82de6..cbfa95a8ee39d9aa55f3061faa84bd81dab20b51 100644 --- a/src/BioHelpers_FABER/rmsd.py +++ b/src/BioHelpers_FABER/rmsd.py @@ -1,6 +1,6 @@ import numpy as np import Bio.PDB as pdb -from os.path import exists +from os.path import exists, splitext from typing import Optional import BioHelpers_FABER.bio_mod as bm @@ -154,73 +154,81 @@ def rmsd(x: np.ndarray, y: np.ndarray) -> float: return np.sqrt(meanSquareDis) +def __get_chain_from_file(file: str) -> pdb.Chain.Chain: + if not exists(file): + raise FileNotFoundError + _, extension = splitext(file) + if extension == ".pdb": + structure = pdb.PDBParser().get_structure("", file)[0] + chain = next(structure.get_chains()) + return chain + elif extension == ".cif": + structure = pdb.MMCifParser().get_structure("", file)[0] + chain = next(structure.get_chains()) + return chain + else: + raise ValueError + + def rmsd_pdb(file1: str, file2: str, exclude_residues: Optional[list] = None) -> float: """Calculate RMSD for two given PDBs - :param file1: Filename/Path of PDB1 + :param file1: Filename/Path of PDB/CIF1 :type file1: str - :param file2: Filename/Path of PDB2 + :param file2: Filename/Path of PDB/CIF2 :type file2: str :param exclude_residues: Which residues shouldn't be considered for RMSD (eg. [(1,3), (9,12)] for not considering residue 1,2,3 and residue 9,10,11,12) :type exclude_residues: list :return: RMSD as float, if files do not exist return 0 :rtype: float """ - if (exists(file1)) and (exists(file2)): - p1 = pdb.PDBParser().get_structure("PDB1", file1)[0] - p2 = pdb.PDBParser().get_structure("PDB2", file2)[0] - p1 = next(p1.get_chains()) - p2 = next(p2.get_chains()) - bm.cleanup_chain(p1) - bm.cleanup_chain(p2) - if exclude_residues != None: - for border in exclude_residues: - ids = list(range(border[0], border[1] + 1)) - for i in ids: - p1.detach_child((" ", i, " ")) - p2.detach_child((" ", i, " ")) - # p1_AA = [list(a.get_vector()) for a in p1.get_atoms()] - # p2_AA = [list(a.get_vector()) for a in p2.get_atoms()] - p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"] - p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"] - p1_AA = np.transpose(np.array(p1_AA)) - p2_AA = np.transpose(np.array(p2_AA)) - return rmsd(p1_AA, p2_AA) - else: - return 0.0 + p1 = __get_chain_from_file(file1) + p2 = __get_chain_from_file(file2) + bm.cleanup_chain(p1) + bm.cleanup_chain(p2) + if exclude_residues != None: + for border in exclude_residues: + ids = list(range(border[0], border[1] + 1)) + for i in ids: + p1.detach_child((" ", i, " ")) + p2.detach_child((" ", i, " ")) + # p1_AA = [list(a.get_vector()) for a in p1.get_atoms()] + # p2_AA = [list(a.get_vector()) for a in p2.get_atoms()] + p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"] + p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"] + p1_AA = np.transpose(np.array(p1_AA)) + p2_AA = np.transpose(np.array(p2_AA)) + return rmsd(p1_AA, p2_AA) def rmsd_per_residue(file1: str, file2: str) -> list[float]: """RMSD for all residues seperately - :param file1: PDB File 1 + :param file1: PDB/cif File 1 :type file1: str - :param file2: PDB File 2 + :param file2: PDB/cif File 2 :type file2: str :return: List of length L (number of residues) with all rmsds per residue :rtype: list[float] """ - if (exists(file1)) and (exists(file2)): - values = [] - p1 = pdb.PDBParser().get_structure("PDB1", file1)[0] - p2 = pdb.PDBParser().get_structure("PDB2", file2)[0] - p1 = next(p1.get_chains()) - p2 = next(p2.get_chains()) - bm.cleanup_chain(p1) - bm.cleanup_chain(p2) - p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"] - p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"] - p1_AA = np.transpose(np.array(p1_AA)) - p2_AA = np.transpose(np.array(p2_AA)) - rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector( - p1_AA, p2_AA - ) - for res1, res2 in zip(p1.get_residues(), p2.get_residues()): - res1_AA = [list(a.get_vector()) for a in res1.get_atoms()] - res2_AA = [list(a.get_vector()) for a in res2.get_atoms()] - res1_AA = np.transpose(np.array(res1_AA)) - res2_AA = np.transpose(np.array(res2_AA)) - values.append(rmsd_from_rot_tran(res1_AA, res2_AA, rot, trans)) + values = [] + p1 = pdb.PDBParser().get_structure("PDB1", file1)[0] + p2 = pdb.PDBParser().get_structure("PDB2", file2)[0] + p1 = __get_chain_from_file(file1) + p2 = __get_chain_from_file(file2) + bm.cleanup_chain(p1) + bm.cleanup_chain(p2) + p1_AA = [list(a.get_vector()) for a in p1.get_atoms() if a.id != "OP3"] + p2_AA = [list(a.get_vector()) for a in p2.get_atoms() if a.id != "OP3"] + p1_AA = np.transpose(np.array(p1_AA)) + p2_AA = np.transpose(np.array(p2_AA)) + rot, trans = get_rotation_matrix(p1_AA, p2_AA), get_translation_vector(p1_AA, p2_AA) + for res1, res2 in zip(p1.get_residues(), p2.get_residues()): + res1_AA = [list(a.get_vector()) for a in res1.get_atoms()] + res2_AA = [list(a.get_vector()) for a in res2.get_atoms()] + res1_AA = np.transpose(np.array(res1_AA)) + res2_AA = np.transpose(np.array(res2_AA)) + values.append(rmsd_from_rot_tran(res1_AA, res2_AA, rot, trans)) return values