Source code for old_pdb.heterogen

"""Classes for PDB records that provide heterogen information.

.. codeauthor::  Todd Dolinsky
.. codeauthor::  Yong Huang
.. codeauthor::  Nathan Baker
"""
import logging
from collections import OrderedDict
from .general import BaseRecord


_LOGGER = logging.getLogger(__name__)


[docs]class Heterogen(BaseRecord): """HET field HET records are used to describe non-standard residues, such as prosthetic groups, inhibitors, solvent molecules, and ions for which coordinates are supplied. Groups are considered HET if they are: * not one of the standard amino acids, and * not one of the nucleic acids (C, G, A, T, U, and I), and * not one of the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), and * not an unknown amino acid or nucleic acid where UNK is used to indicate the unknown residue name. Het records also describe heterogens for which the chemical identity is unknown, in which case the group is assigned the hetatm_id UNK. +---------+-------------+---------------+---------------------------------+ | COLUMNS | DATA TYPE | FIELD | DEFINITION | +=========+=============+===============+=================================+ | 1-6 | Record name | "HET " | | +---------+-------------+---------------+---------------------------------+ | 8-10 | LString(3) | het_id | Identifier, right-justified. | +---------+-------------+---------------+---------------------------------+ | 13 | Character | chain_id | Chain identifier. | +---------+-------------+---------------+---------------------------------+ | 14-17 | Integer | seq_num | Sequence number. | +---------+-------------+---------------+---------------------------------+ | 18 | AChar | ins_code | Insertion code. | +---------+-------------+---------------+---------------------------------+ | 21-25 | Integer | num_het_atoms | Number of HETATM records for | | | | | the group present in the entry. | +---------+-------------+---------------+---------------------------------+ | 31-70 | String | text | Text describing Het group. | +---------+-------------+---------------+---------------------------------+ """ def __init__(self): super().__init__() self.hetatm_id = None self.chain_id = None self.seq_num = None self.ins_code = None self.num_het_atoms = None self.text = None
[docs] def parse_line(self, line): """Parse PDB-format line. :param str line: line to parse """ super().parse_line(line) self.hetatm_id = line[7:10].strip() self.chain_id = line[12].strip() self.seq_num = int(line[13:17].strip()) self.ins_code = line[17].strip() self.num_het_atoms = int(line[20:25].strip()) self.text = line[30:70].strip()
def __str__(self): return ( f"HET {self.hetatm_id:>3} {self.chain_id:1}{self.seq_num:4}" f"{self.ins_code:1} {self.num_het_atoms:5} {self.text:40}" )
[docs]class HeterogenName(BaseRecord): """HETNAM field This record gives the chemical name of the compound with the given hetatm_id. +---------+--------------+--------------+---------------------------------+ | COLUMNS | DATA TYPE | FIELD | DEFINITION | +=========+==============+==============+=================================+ | 1-6 | Record name | "HETNAM" | | +---------+--------------+--------------+---------------------------------+ | 9-10 | Continuation | continuation | Allows concatenation of | | | | | multiple records. | +---------+--------------+--------------+---------------------------------+ | 12-14 | LString(3) | het_id | Het identifier, right- | | | | | justified. | +---------+--------------+--------------+---------------------------------+ | 16-70 | String | text | Chemical name. | +---------+--------------+--------------+---------------------------------+ """ def __init__(self): super().__init__() self.heterogens = OrderedDict()
[docs] def parse_line(self, line): """Parse PDB-format line. :param str line: line to parse """ super().parse_line(line) hetatm_id = line[11:14].strip() string = line[15:70].strip() strings = self.heterogens.get(hetatm_id, []) strings.append(string) self.heterogens[hetatm_id] = strings
def __str__(self): strings = [] for hetatm, lines in self.heterogens.items(): for iline, line in enumerate(lines): continuation = iline + 1 if continuation > 1: string = ( f"HETNAM {continuation:>2} {hetatm:>3} {line:54}" ) else: string = f"HETNAM {hetatm:>3} {line:55}" strings.append(string) return "\n".join(strings)
[docs]class HeterogenSynonym(BaseRecord): """HETSYN field This record provides synonyms, if any, for the compound in the corresponding (i.e., same hetatm_id) HETNAM record. This is to allow greater flexibility in searching for HET groups. +----------+--------------+--------------+--------------------------------+ | COLUMNS | DATA TYPE | FIELD | DEFINITION | +==========+==============+==============+================================+ | 1-6 | Record name | "HETSYN" | | +----------+--------------+--------------+--------------------------------+ | 9-10 | Continuation | continuation | Allows concatenation of | | | | | multiple records. | +----------+--------------+--------------+--------------------------------+ | 12-14 | LString(3) | het_id | Het identifier, right- | | | | | justified. | +----------+--------------+--------------+--------------------------------+ | 16-70 | SList | synonyms | List of synonyms. | +----------+--------------+--------------+--------------------------------+ """ def __init__(self): super().__init__() self.synonyms = OrderedDict()
[docs] def parse_line(self, line): super().parse_line(line) het_id = line[11:14].strip() synonyms = self.synonyms.get(het_id, []) synonyms.append(line[15:70].strip()) self.synonyms[het_id] = synonyms
def __str__(self): lines = [] for het_id, synonyms in self.synonyms.items(): for isyn, syn in enumerate(synonyms): continuation = isyn + 1 if continuation > 1: line = f"HETSYN {continuation:>2} {het_id:3} {syn:54}" else: line = f"HETSYN {het_id:3} {syn:55}" lines.append(line) return "\n".join(lines)
[docs]class Formula(BaseRecord): """FORMUL field The FORMUL record presents the chemical formula and charge of a non-standard group. +---------+-------------+--------------+----------------------------------+ | COLUMNS | DATA TYPE | FIELD | DEFINITION | +=========+=============+==============+==================================+ | 1-6 | Record name | "FORMUL" | | +---------+-------------+--------------+----------------------------------+ | 9-10 | Integer | compNum | Component number. | +---------+-------------+--------------+----------------------------------+ | 13-15 | LString(3) | hetID | Het identifier. | +---------+-------------+--------------+----------------------------------+ | 17-18 | Integer | continuation | Continuation number. | +---------+-------------+--------------+----------------------------------+ | 19 | Character | asterisk | "*" for water. | +---------+-------------+--------------+----------------------------------+ | 20-70 | String | text | Chemical formula. | +---------+-------------+--------------+----------------------------------+ """ def __init__(self): super().__init__() self._components = OrderedDict() @property def components(self) -> dict: """Formulae for components. :returns: dictionary with component numbers as keys and values that consist of tuples of the hetatom ID and the formula text. """ return self._components @components.setter def components(self, value): self._components = value
[docs] def parse_line(self, line): """Parse PDB-format line. :param str line: line to parse """ super().parse_line(line) component_num = int(line[8:10].strip()) if component_num not in self._components: self._components[component_num] = [] hetatm_id = line[12:15].strip() text = line[18:70].rstrip() self._components[component_num].append((hetatm_id, text))
def __str__(self): strings = [] for component_num, component_list in self._components.items(): for hetatm_id, text in component_list: string = ( f"FORMUL {component_num:>2} {hetatm_id:>3} {text:52}" ).strip() strings.append(string) return "\n".join(strings)