Source code for old_pdb.pdb_entry

"""Top-level module for PDB structure entries.

The specifications used in this class are derived from the `Protein Data Bank
Contents Guide: Atomic Coordinate Entry Format Description, Version 3.3
<https://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html>`_.

.. codeauthor::  Nathan Baker
"""
from old_pdb.primary import DatabaseReference
from old_pdb.secondary import Helix
from old_pdb.heterogen import HeterogenSynonym
import logging
from . import annotation, primary, heterogen, secondary, coordinates
from . import crystallography, bookkeeping


_LOGGER = logging.getLogger(__name__)
REF_LINE = (
    "0        1         2         3         4         5         6         7         8\n"
    "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
)


[docs]class Entry: """Top-level class for PDB structure entry.""" def __init__(self): # Title section self._header = None self._obsolete = None self._title = None self._split = None self._caveat = None self._compound = None self._source = None self._keyword = None self._experimental_data = None self._num_model = None self._model_type = None self._author = None self._revision_data = None self._supersedes = None self._journal = [] self._remark = [] # Primary structure section self._database_reference = [] self._sequence_difference = [] self._sequence_residue = None self._modified_residue = [] # Heterogen section self._heterogen = [] self._heterogen_name = None self._heterogen_synonym = None self._heterogen_formula = None # Secondary structure section self._helix = [] self._sheet = [] # Connectivity annotation section self._disulfide_bond = [] self._link = [] self._cis_peptide = [] # Miscellaneous section self.site = [] # Crystallographic and coordinate transformation section self._unit_cell = None self._orig_transform = [] self._frac_transform = [] self._noncrystal_transform = [] # Coordinate section self._model = [] # Connectivity section self._connect = [] # Bookeeping section self._master = None @property def header(self) -> annotation.Header: """:class:`.annotation.Header` HEADER record.""" if self._header is None: self._header = annotation.Header() return self._header @header.setter def header(self, value): self._header = value @property def obsolete(self) -> annotation.Obsolete: """:class:`.annotation.Obsolete` OBSLTE record.""" if self._obsolete is None: self._obsolete = annotation.Obsolete() return self._obsolete @obsolete.setter def obsolete(self, value): self._obsolete = value @property def title(self) -> annotation.Title: """:class:`.annotation.Title` TITLE record.""" if self._title is None: self._title = annotation.Title() return self._title @title.setter def title(self, value): self._title = value @property def split(self) -> annotation.Split: """:class:`.annotation.Split` SPLIT record.""" if self._split is None: self._split = annotation.Split() return self._split @split.setter def split(self, value): self._split = value @property def caveat(self) -> annotation.Caveat: """:class:`.annotation.Caveat` CAVEAT record.""" if self._caveat is None: self._caveat = annotation.Caveat() return self._caveat @caveat.setter def caveat(self, value): self._caveat = value @property def compound(self) -> annotation.Compound: """:class:`.annotation.Compound` COMPND record.""" if self._compound is None: self._compound = annotation.Compound() return self._compound @compound.setter def compound(self, value): self._compound = value @property def source(self) -> annotation.Source: """:class:`.annotation.Source` SOURCE record.""" if self._source is None: self._source = annotation.Source() return self._source @source.setter def source(self, value): self._source = value @property def keyword(self) -> annotation.Keywords: """:class:`.annotation.Keywords` KEYWDS record.""" if self._keyword is None: self._keyword = annotation.Keywords() return self._keyword @keyword.setter def keyword(self, value): self._keyword = value @property def experimental_data(self) -> annotation.ExperimentalData: """:class:`.annotation.ExperimentalData` EXPDTA record.""" if self._experimental_data is None: self._experimental_data = annotation.ExperimentalData() return self._experimental_data @experimental_data.setter def experimental_data(self, value): self._experimental_data = value @property def num_model(self) -> annotation.NumModels: """:class:`.annotation.NumModels` NUMMDL record.""" if self._num_model is None: _LOGGER.error("Creating new NumModel") self._num_model = annotation.NumModels() return self._num_model @num_model.setter def num_model(self, value): self._num_model = value @property def model_type(self) -> annotation.ModelType: """:class:`.annotation.ModelType` MDLTYP record.""" if self._model_type is None: self._model_type = annotation.ModelType() return self._model_type @model_type.setter def model_type(self, value): self._model_type = value @property def author(self) -> annotation.Author: """:class:`.annotation.Author` AUTHOR record.""" if self._author is None: self._author = annotation.Author() return self._author @author.setter def setter(self, value): self._setter = value @property def revision_data(self) -> annotation.RevisionData: """:class:`.annotation.RevisionData` REVDAT record.""" if self._revision_data is None: self._revision_data = annotation.RevisionData() return self._revision_data @revision_data.setter def revision_data(self, value): self._revision_data = value @property def supersedes(self) -> annotation.Supersedes: """:class:`.annotation.Supersedes` SPRSDE record.""" if self._supersedes is None: self._supersedes = annotation.Supersedes() return self._supersedes @supersedes.setter def supersedes(self, value): self._supersedes = value @property def journal(self) -> annotation.Journal: """:class:`.annotation.Journal` JRNL record.""" if self._journal is None: self._journal = annotation.Journal() return self._journal @journal.setter def journal(self, value): self._journal = value @property def remark(self) -> list: """List of :class:`annotation.Remark` REMARK records.""" return self._remark @remark.setter def remark(self, value): self._remark = value @property def database_reference(self) -> list: """List of :class:`primary.DatabaseReference` DBREF records.""" return self._database_reference @database_reference.setter def database_reference(self, value): self._database_reference = value @property def sequence_difference(self) -> list: """List of :class:`primary.SequenceDifferences` SEQADV records.""" return self._sequence_difference @sequence_difference.setter def sequence_difference(self, value): self._sequence_difference = value @property def sequence_residue(self) -> list: """List of :class:`.primary.SequenceResidues` SEQRES records.""" return self._sequence_residue @sequence_residue.setter def sequence_residue(self, value): self._sequence_residue = value @property def modified_residue(self) -> list: """List of :class:`.primary.ModifiedResidue` MODRES records.""" return self._modified_residue @modified_residue.setter def modified_residue(self, value): self._modified_residue = value @property def heterogen(self) -> list: """List of :class:`.heterogen.Heterogen` HET records.""" return self._heterogen @heterogen.setter def heterogen(self, value): self._heterogen = value @property def heterogen_name(self) -> list: """List of :class:`.heterogen.HeterogenName` HETNAM records.""" if self._heterogen_name is None: self._heterogen_name = heterogen.HeterogenName() return self._heterogen_name @heterogen_name.setter def heterogen_name(self, value): self._heterogen_name = value @property def heterogen_synonym(self): """:class:`.heterogen.HeterogenSynonym` HETSYN record.""" if self._heterogen_synonym is None: self._heterogen_synonym = heterogen.HeterogenSynonym() return self._heterogen_synonym @heterogen_synonym.setter def heterogen_synonym(self, value): self._heterogen_synonym = value @property def heterogen_formula(self): """:class:`heterogen.Formula` FORMUL record.""" if self._heterogen_formula is None: self._heterogen_formula = heterogen.Formula() @heterogen_formula.setter def heterogen_formula(self, value): self._heterogen_formula = value @property def helix(self) -> list: """List of :class:`.secondary.Helix` HELIX records.""" return self._helix @helix.setter def helix(self, value): self._helix = value @property def sheet(self) -> list: """List of :class:`.secondary.Sheet` SHEET records.""" return self._sheet @sheet.setter def sheet(self, value): self._sheet = value @property def disulfide_bond(self) -> list: """List of :class:`.secondary.DisulfideBond` SSBOND records.""" return self._disulfide_bond @disulfide_bond.setter def disulfide_bond(self, value): self._disulfide_bond = value @property def link(self) -> list: """List of :class:`.secondary.Link` LINK records.""" return self._link @link.setter def link(self, value): self._link = value @property def cis_peptide(self) -> list: """List of :class:`.secondary.CisPeptide` CISPEP records.""" return self._cis_peptide @cis_peptide.setter def cis_peptide(self, value): self._cis_peptide = value @property def unit_cell(self) -> crystallography.UnitCell: """:class:`.crystallography.UnitCell` CRYST1 record.""" if self._unit_cell is None: self._unit_cell = crystallography.UnitCell() return self._unit_cell @unit_cell.setter def unit_cell(self, value): self._unit_cell = value @property def original_transform(self) -> list: """List of :class:`.crystallography.OriginalTransform` ORIGX records.""" return self._orig_transform @original_transform.setter def original_transform(self, value): self._orig_transform = value @property def frac_transform(self) -> list: """List of :class:`crystallography.FractionalTransform` SCALEn records.""" return self._frac_transform @frac_transform.setter def frac_transform(self, value): self._frac_transform = value @property def noncrystal_transform(self) -> list: """List of :class:`.crystallography.NoncrystalTransform` MTRIXn records.""" return self._noncrystal_transform @noncrystal_transform.setter def noncrystal_transform(self, value): self._noncrystal_transform = value @property def model(self) -> list: """List of :class:`.coordinates.Model` MODEL records.""" return self._model @model.setter def model(self, value): self._model = value @property def connect(self) -> list: """List of :class:`.bookkeeping.Connection` CONECT records.""" return self._connect @connect.setter def connect(self, value): self._connect = value @property def master(self) -> bookkeeping.Master: """:class:`.bookkeeping.Master` MASTER record.""" if self._master is not None: self._master = bookkeeping.Master() return self._master
[docs] def find_residue(self, chain_id, residue_id, model_num=1) -> list: """Find a specific residue. :param str chain_id: chain ID to find :param int residue_id: residue ID to find :param int model_num: model number to use :returns: list of :class:`.coordinates.Atom`-like objects """ model = self._model[model_num - 1] atoms = [] for atom in model.all_atoms: if (atom.chain_id == chain_id) and (atom.res_seq == residue_id): atoms.append(atom) if len(atoms) == 0: _LOGGER.warning( f"Unable to find residue {residue_id} in chain {chain_id} of " f"model {model_num}." ) return atoms
[docs] def find_atom_by_name( self, chain_id, residue_id, atom_name, model_num=1 ) -> coordinates.Atom: """Find a specific atom by name. :param str chain_id: chain ID to find :param int residue_id: residue ID to find :param str atom_name: name of atom to find :param int model_num: model number to use :returns: ATOM or HETATM object """ atoms = self.find_residue(chain_id, residue_id, model_num) for atom in atoms: if atom.name == atom_name: return atom
def __str__(self): strings = [] # Title section for record in ( [ self._header, self._obsolete, self._title, self._split, self._caveat, self._compound, self._source, self._keyword, self._experimental_data, self._num_model, self._model_type, self.author, ] + [self._revision_data] + [self._supersedes] + self._journal + self._remark ): if record is not None: strings.append(str(record)) # Primary structure section for record in ( self._database_reference + self._sequence_difference + [self._sequence_residue] + self._modified_residue ): if record is not None: strings.append(str(record)) # Heterogen section for record in self._heterogen + [ self._heterogen_name, self._heterogen_synonym, self._heterogen_formula, ]: if record is not None: strings.append(str(record)) # Secondary structure section for record in self._helix + self._sheet: if record is not None: strings.append(str(record)) # Connectivity annotation section for record in self._disulfide_bond: if record is not None: strings.append(str(record)) for record in self._link: record = self.annotate_link(record) if record is not None: strings.append(str(record)) for record in self._cis_peptide: if record is not None: strings.append(str(record)) # Miscellaneous section for record in self.site: if record is not None: strings.append(str(record)) # Crystallographic and coordinate transformation section for record in ( [self._unit_cell] + self._orig_transform + self._frac_transform + self._noncrystal_transform ): if record is not None: strings.append(str(record)) # Coordinate section for record in self._model: if record is not None: strings.append(str(record)) if len(self._model) > 1: strings.append("ENDMDL") # Connectivity section for record in self._connect: if record is not None: strings.append(str(record)) # Bookkeeping section if self._master is not None: strings.append(str(self._master)) strings.append("END ") return "\n".join(strings)
[docs] def parse_file(self, file_): """Parse a PDB file. :param file file_: file open for reading. """ for line in file_: try: self.parse_line(line) except Exception as exc: err = f"Offending line:\n{REF_LINE}\n{line}" raise ValueError(err) from exc
[docs] def parse_line(self, line): """Parse a line of a PDB file. :param str line: line of PDB file """ name = line[0:6].strip() if name == "HEADER": if self._header: err = f"HEADER already exists:\n{self._header}" raise ValueError(err) self._header = annotation.Header() self._header.parse_line(line) elif name == "OBSLTE": if not self._obsolete: self._obsolete = annotation.Obsolete() self._obsolete.parse_line(line) elif name == "TITLE": if not self._title: self._title = annotation.Title() self._title.parse_line(line) elif name == "SPLIT": if not self._split: self._split = annotation.Split() self._split.parse_line(line) elif name == "CAVEAT": if not self._caveat: self._caveat = annotation.Caveat() self._caveat.parse_line(line) elif name == "COMPND": if not self._compound: self._compound = annotation.Compound() self._compound.parse_line(line) elif name == "SOURCE": if not self._source: self._source = annotation.Source() self._source.parse_line(line) elif name == "KEYWDS": if not self._keyword: self._keyword = annotation.Keywords() self._keyword.parse_line(line) elif name == "EXPDTA": if not self._experimental_data: self._experimental_data = annotation.ExperimentalData() self._experimental_data.parse_line(line) elif name == "NUMMDL": if self._num_model is not None: err = f"NUMMDL already exists:\n{self._num_model}" raise ValueError(err) self._num_model = annotation.NumModels() self._num_model.parse_line(line) elif name == "MDLTYP": if not self._model_type: self._model_type = annotation.ModelType() self._model_type.parse_line(line) elif name == "AUTHOR": if not self.author: self.author = annotation.Author() self.author.parse_line(line) elif name == "REVDAT": if not self._revision_data: self._revision_data = annotation.RevisionData() self._revision_data.parse_line(line) elif name == "SPRSDE": if not self._supersedes: self._supersedes = annotation.Supersedes() self._supersedes.parse_line(line) elif name == "JRNL": journal = annotation.Journal() journal.parse_line(line) self._journal.append(journal) elif name == "REMARK": remark = annotation.Remark() remark.parse_line(line) self._remark.append(remark) elif name == "DBREF": database = primary.DatabaseReference() database.parse_line(line) self._database_reference.append(database) elif name == "DBREF1": database = primary.DatabaseReference1() database.parse_line(line) self._database_reference.append(database) elif name == "DBREF2": database = primary.DatabaseReference2() database.parse_line(line) self._database_reference.append(database) elif name == "SEQADV": seqadv = primary.SequenceDifferences() seqadv.parse_line(line) self._sequence_difference.append(seqadv) elif name == "SEQRES": if not self._sequence_residue: self._sequence_residue = primary.SequenceResidues() self._sequence_residue.parse_line(line) elif name == "HET": het = heterogen.Heterogen() het.parse_line(line) self._heterogen.append(het) elif name == "HETNAM": if not self._heterogen_name: self._heterogen_name = heterogen.HeterogenName() self._heterogen_name.parse_line(line) elif name == "HETSYN": if not self._heterogen_synonym: self._heterogen_synonym = heterogen.HeterogenSynonym() self._heterogen_synonym.parse_line(line) elif name == "FORMUL": if not self._heterogen_formula: self._heterogen_formula = heterogen.Formula() self._heterogen_formula.parse_line(line) elif name == "HELIX": helix = secondary.Helix() helix.parse_line(line) self._helix.append(helix) elif name == "SHEET": sheet = secondary.Sheet() sheet.parse_line(line) self._sheet.append(sheet) elif name == "SSBOND": bond = secondary.DisulfideBond() bond.parse_line(line) self._disulfide_bond.append(bond) elif name == "LINK": link = secondary.Link() link.parse_line(line) self._link.append(link) elif name == "CISPEP": pep = secondary.CisPeptide() pep.parse_line(line) self._cis_peptide.append(pep) elif name == "SITE": site = annotation.Site() site.parse_line(line) self.site.append(site) elif name == "CRYST1": if self._unit_cell is not None: err = f"CRYST1 already exists:\n{self._unit_cell}" raise ValueError(err) self._unit_cell = crystallography.UnitCell() self._unit_cell.parse_line(line) elif name in ["ORIGX1", "ORIGX2", "ORIGX3"]: n = int(name[5]) orig = crystallography.OriginalTransform(n) orig.parse_line(line) self._orig_transform.append(orig) if len(self._orig_transform) > 3: err = f"Too many ({len(self._orig_transform)}) transforms." raise ValueError(err) elif name in ["SCALE1", "SCALE2", "SCALE3"]: n = int(name[5]) scale = crystallography.FractionalTransform(n) scale.parse_line(line) self._frac_transform.append(scale) if len(self._frac_transform) > 3: err = f"Too many ({len(self._frac_transform)}) transforms." raise ValueError(err) elif name in ["MTRIX1", "MTRIX2", "MTRIX3"]: n = int(name[5]) matrix = crystallography.NoncrystalTransform(n) matrix.parse_line(line) self._noncrystal_transform.append(matrix) if len(self._noncrystal_transform) > 3: err = ( f"Too many ({len(self._noncrystal_transform)}) transforms." ) raise ValueError(err) elif name == "MODEL": model = coordinates.Model() model.parse_line(line) self._model.append(model) elif name in ["ATOM", "ANISOU", "TER", "HETATM"]: if len(self._model) == 0: self._model = [coordinates.Model()] self._model[-1].parse_line(line) elif name == "CONECT": connect = bookkeeping.Connection() connect.parse_line(line) self._connect.append(connect) elif name == "MASTER": if self._master: err = f"MASTER record already exists. Got: {line}." raise ValueError(err) self._master = bookkeeping.Master() self._master.parse_line(line) elif name in ["ENDMDL", "END"]: pass else: err = f"Unexpected entry:\n{line}" raise ValueError(err)
[docs] def num_transforms(self) -> int: """Return the number of optional transform records in entry. :returns: number of ORGIXn + SCALEn + MTRIXn """ return ( len(self._orig_transform) + len(self._frac_transform) + len(self._noncrystal_transform) )
[docs] def num_atoms(self, heavy_only=True) -> int: """Number of ATOM and HETATM entries in all chains in entry. :param bool heavy_only: exclude hydrogen atoms from count """ return self._model[0].num_atoms(heavy_only)
[docs] def num_chains(self) -> int: """Number of chains in entry.""" return self._model[0].num_chains()
[docs] def num_residues(self, count_hetatm=False) -> int: """Number of residues in entry. :param bool count_hetam: include heterogen residues in count """ return self._model[0].num_residues(count_hetatm)
[docs] def num_ter(self) -> int: """Number of TER records in entry.""" num = 0 for model in self._model: num += model.num_ter() return num
[docs] def check_master(self): """Check the contents against internal bookkeeping records. :raises AssertionError: if checks fail """ master = self._master for field, expected, test in [ ( "model", self._num_model.model_number if self._num_model is not None else 1, len(self._model), ), ("REMARK", master.num_remark, len(self._remark)), ("HETATM", master.num_het, len(self._heterogen)), ("HELIX", master.num_helix, len(self._helix)), ("SHEET", master.num_sheet, len(self._sheet)), ("SITE", master.num_site, len(self.site)), ("transform", master.num_xform, self.num_transforms()), ("coordinate", master.num_coord, self.num_atoms()), ("TER", master.num_ter, self.num_ter()), ("CONECT", master.num_conect, len(self._connect)), ]: test_str = ( f"MASTER indicates {expected} {field} records; found {test}." ) try: assert expected == test except AssertionError: err = ( f"{test_str}" " However, the MASTER record is hard to interpret." ) _LOGGER.warning(err)