"""Classes for records with coordinate information.
.. codeauthor:: Todd Dolinsky
.. codeauthor:: Yong Huang
.. codeauthor:: Nathan Baker
"""
from itertools import count
import logging
from typing import OrderedDict
from .general import BaseRecord, atom_format
_LOGGER = logging.getLogger(__name__)
[docs]class Model(BaseRecord):
"""MODEL class.
The MODEL record specifies the model serial number when multiple
structures are presented in a single coordinate entry, as is often the
case with structures determined by NMR.
+---------+-------------+----------+--------------------------------------+
| COLUMNS | DATA TYPE | FIELD | DEFINITION |
+=========+=============+==========+======================================+
| 1-6 | Record name | "MODEL " | |
+---------+-------------+----------+--------------------------------------+
| 11-14 | Integer | serial | Model serial number. |
+---------+-------------+----------+--------------------------------------+
"""
def __init__(self):
super().__init__()
self.serial = None
self.records = []
[docs] def parse_line(self, line):
"""Parse PDB-format line.
:param str line: line to parse
"""
super().parse_line(line)
name = line[0:6].strip()
if name == "MODEL":
self.serial = int(line[10:14].strip())
return
if name == "ENDMDL":
return
if name == "ATOM":
record = Atom()
elif name == "ANISOU":
record = TemperatureFactor()
elif name == "TER":
record = ChainTerminus()
elif name == "HETATM":
record = HeterogenAtom()
else:
err = f"Unexpected line: {line}"
raise ValueError(err)
record.parse_line(line)
self.records.append(record)
@property
def all_atoms(self) -> list:
"""Get all atoms in model.
:returns: list of :class:`Atom`-like objects
"""
return [
rec for rec in self.records if isinstance(
rec, (Atom, HeterogenAtom)
)
]
@property
def het_atoms(self) -> list:
"""Get HETATM atoms in model.
:returns: list of :class:`Atom`-like objects
"""
return [
rec for rec in self.records if isinstance(rec, HeterogenAtom)
]
@property
def atoms(self) -> list:
"""Get ATOM atoms in model.
:returns: list of :class:`Atom`-like objects
"""
return [
rec for rec in self.records if isinstance(rec, Atom)
]
[docs] def num_atoms(self, heavy_only) -> int:
"""Number of ATOM and HETATM entries in all chains in model.
:param bool heavy_only: exclude hydrogen atoms from count
"""
num_atom = 0
for atom in self.all_atoms:
if (atom.element not in ["H", "D"]) or (not heavy_only):
num_atom += 1
return num_atom
[docs] def num_chains(self) -> int:
"""Count number of chains in model."""
chains = set()
for atom in self.all_atoms:
chains.add(atom.chain_id)
return len(chains)
[docs] def num_residues(self, count_hetatm) -> int:
"""Number of residues in entry.
:param bool count_hetatm: include heterogen residues in count
"""
residues = set()
if count_hetatm:
atom_list = self.all_atoms
else:
atom_list = self.atoms
for atom in atom_list:
key = f"{atom.chain_id}{atom.res_name}{atom.res_seq}"
residues.add(key)
return len(residues)
[docs] def num_ter(self) -> int:
"""Count number of termini in entry."""
return len(
[
rec for rec in self.records if isinstance(rec, ChainTerminus)
]
)
def __str__(self):
strings = []
if self.serial:
strings.append(f"MODEL {self.serial:4}".strip())
for record in self.records:
strings.append(str(record))
return "\n".join(strings)
[docs]class Atom(BaseRecord):
"""ATOM class
The ATOM records present the atomic coordinates for standard residues.
They also present the occupancy and temperature factor for each atom.
Heterogen coordinates use the HETATM record type. The element symbol is
always present on each ATOM record; segment identifier and charge are
optional.
+---------+--------------+-------------+----------------------------------+
| COLUMNS | DATA TYPE | FIELD | DEFINITION |
+=========+==============+=============+==================================+
| 1-6 | Record name | "ATOM " | |
+---------+--------------+-------------+----------------------------------+
| 7-11 | Integer | serial | Atom serial number. |
+---------+--------------+-------------+----------------------------------+
| 13-16 | Atom | name | Atom name. |
+---------+--------------+-------------+----------------------------------+
| 17 | Character | alt_loc | Alternate location indicator. |
+---------+--------------+-------------+----------------------------------+
| 18-20 | Residue name | res_name | Residue name. |
+---------+--------------+-------------+----------------------------------+
| 22 | Character | chain_id | Chain identifier. |
+---------+--------------+-------------+----------------------------------+
| 23-26 | Integer | res_seq | Residue sequence number. |
+---------+--------------+-------------+----------------------------------+
| 27 | AChar | ins_code | Code for insertion of residues. |
+---------+--------------+-------------+----------------------------------+
| 31-38 | Real(8.3) | x | Orthogonal coordinates for X in |
| | | | Angstroms. |
+---------+--------------+-------------+----------------------------------+
| 39-46 | Real(8.3) | y | Orthogonal coordinates for Y in |
| | | | Angstroms. |
+---------+--------------+-------------+----------------------------------+
| 47-54 | Real(8.3) | z | Orthogonal coordinates for Z in |
| | | | Angstroms. |
+---------+--------------+-------------+----------------------------------+
| 55-60 | Real(6.2) | occupancy | Occupancy. |
+---------+--------------+-------------+----------------------------------+
| 61-66 | Real(6.2) | temp_factor | Temperature factor. |
+---------+--------------+-------------+----------------------------------+
| 77-78 | LString(2) | element | Element symbol, right-justified. |
+---------+--------------+-------------+----------------------------------+
| 79-80 | LString(2) | charge | Charge on the atom. |
+---------+--------------+-------------+----------------------------------+
"""
def __init__(self):
super().__init__()
self.serial = None
self.name = None
self.alt_loc = None
self.res_name = None
self.chain_id = None
self.res_seq = None
self.ins_code = None
self.x = None
self.y = None
self.z = None
self.occupancy = 0.00
self.temp_factor = 0.00
self.seg_id = ""
self.element = ""
self.charge = ""
[docs] def parse_line(self, line):
"""Parse PDB-format line.
:param str line: line to parse
"""
super().parse_line(line)
self.serial = int(line[6:11].strip())
self.name = line[12:16].strip()
self.alt_loc = line[16].strip()
self.res_name = line[17:20].strip()
self.chain_id = line[21].strip()
self.res_seq = int(line[22:26].strip())
self.ins_code = line[26].strip()
self.x = float(line[30:38].strip())
self.y = float(line[38:46].strip())
self.z = float(line[46:54].strip())
try:
self.occupancy = float(line[54:60].strip())
self.temp_factor = float(line[60:66].strip())
self.seg_id = line[72:76].strip()
self.element = line[76:78].strip()
self.charge = line[78:80].strip()
except (ValueError, IndexError):
pass
def __str__(self):
return (
f"ATOM {self.serial:5} {atom_format(self)}{self.alt_loc:1}"
f"{self.res_name:>3} {self.chain_id:1}{self.res_seq:4}"
f"{self.ins_code:1} {self.x:8.3f}{self.y:8.3f}"
f"{self.z:8.3f}{self.occupancy:6.2f}{self.temp_factor:6.2f}"
f" {self.element}{self.charge:2}"
)
[docs]class TemperatureFactor(BaseRecord):
"""ANISOU class
The ANISOU records present the anisotropic temperature factors.
+---------+--------------+----------+-------------------------------------+
| COLUMNS | DATA TYPE | FIELD | DEFINITION |
+=========+==============+==========+=====================================+
| 1-6 | Record name | "ANISOU" | |
+---------+--------------+----------+-------------------------------------+
| 7-11 | Integer | serial | Atom serial number. |
+---------+--------------+----------+-------------------------------------+
| 13-16 | Atom | name | Atom name. |
+---------+--------------+----------+-------------------------------------+
| 17 | Character | alt_loc | Alternate location indicator |
+---------+--------------+----------+-------------------------------------+
| 18-20 | Residue name | res_name | Residue name. |
+---------+--------------+----------+-------------------------------------+
| 22 | Character | chain_id | Chain identifier. |
+---------+--------------+----------+-------------------------------------+
| 23-26 | Integer | res_seq | Residue sequence number. |
+---------+--------------+----------+-------------------------------------+
| 27 | AChar | ins_code | Insertion code. |
+---------+--------------+----------+-------------------------------------+
| 29-35 | Integer | u00 | U(1,1) |
+---------+--------------+----------+-------------------------------------+
| 36-42 | Integer | u11 | U(2,2) |
+---------+--------------+----------+-------------------------------------+
| 43-49 | Integer | u22 | U(3,3) |
+---------+--------------+----------+-------------------------------------+
| 50-56 | Integer | u01 | U(1,2) |
+---------+--------------+----------+-------------------------------------+
| 57-63 | Integer | u02 | U(1,3) |
+---------+--------------+----------+-------------------------------------+
| 64-70 | Integer | u12 | U(2,3) |
+---------+--------------+----------+-------------------------------------+
| 77-78 | LString(2) | element | Element symbol, right-justified. |
+---------+--------------+----------+-------------------------------------+
| 79-80 | LString(2) | charge | Charge on the atom. |
+---------+--------------+----------+-------------------------------------+
"""
def __init__(self):
super().__init__()
self.serial = None
self.name = None
self.alt_loc = None
self.res_name = None
self.chain_id = None
self.res_seq = None
self.ins_code = None
self.u00 = None
self.u11 = None
self.u22 = None
self.u01 = None
self.u02 = None
self.u12 = None
self.seg_id = None
self.element = None
self.charge = None
[docs] def parse_line(self, line):
"""Parse PDB-format line.
:param str line: line to parse
"""
super().parse_line(line)
self.serial = int(line[6:11].strip())
self.name = line[12:16].strip()
self.alt_loc = line[16].strip()
self.res_name = line[17:20].strip()
self.chain_id = line[21].strip()
self.res_seq = int(line[22:26].strip())
self.ins_code = line[26].strip()
self.u00 = int(line[28:35].strip())
self.u11 = int(line[35:42].strip())
self.u22 = int(line[42:49].strip())
self.u01 = int(line[49:56].strip())
self.u02 = int(line[56:63].strip())
self.u12 = int(line[63:70].strip())
self.seg_id = line[72:76].strip()
self.element = line[76:78].strip()
self.charge = line[78:80].strip()
def __str__(self):
return(
f"ANISOU{self.serial:5} {atom_format(self)}{self.alt_loc:1}"
f"{self.res_name:>3} {self.chain_id:1}{self.res_seq:4}"
f"{self.ins_code:1} {self.u00:7}{self.u11:7}{self.u22:7}"
f"{self.u01:7}{self.u02:7}{self.u12:7} {self.element:>2}"
f"{self.charge:2}"
)
[docs]class ChainTerminus(BaseRecord):
"""TER class
The TER record indicates the end of a list of ATOM/HETATM records for a
chain.
+---------+--------------+----------+-------------------------------------+
| COLUMNS | DATA TYPE | FIELD | DEFINITION |
+=========+==============+==========+=====================================+
| 1-6 | Record name | "TER " | |
+---------+--------------+----------+-------------------------------------+
| 7-11 | Integer | serial | Serial number. |
+---------+--------------+----------+-------------------------------------+
| 18-20 | Residue name | res_name | Residue name. |
+---------+--------------+----------+-------------------------------------+
| 22 | Character | chain_id | Chain identifier. |
+---------+--------------+----------+-------------------------------------+
| 23-26 | Integer | res_seq | Residue sequence number. |
+---------+--------------+----------+-------------------------------------+
| 27 | AChar | ins_code | Insertion code. |
+---------+--------------+----------+-------------------------------------+
"""
def __init__(self):
super().__init__()
self.serial = None
self.res_name = None
self.chain_id = None
self.res_seq = None
self.ins_code = ""
[docs] def parse_line(self, line):
"""Parse PDB-format line.
:param str line: line to parse
"""
super().parse_line(line)
if line is None:
line = ""
try:
self.serial = int(line[6:11].strip())
self.res_name = line[17:20].strip()
self.chain_id = line[21].strip()
self.res_seq = int(line[22:26].strip())
self.ins_code = line[26].strip()
except (IndexError, ValueError):
pass
def __str__(self):
return (
f"TER {self.serial:5} {self.res_name:>3}"
f" {self.chain_id:1}{self.res_seq:4}{self.ins_code:1}"
)
[docs]class HeterogenAtom(BaseRecord):
"""HETATM class
The HETATM records present the atomic coordinate records for atoms
within "non-standard" groups. These records are used for water
molecules and atoms presented in HET groups.
+---------+--------------+-------------+----------------------------------+
| COLUMNS | DATA TYPE | FIELD | DEFINITION |
+=========+==============+=============+==================================+
| 1-6 | Record name | "HETATM" | |
+---------+--------------+-------------+----------------------------------+
| 7-11 | Integer | serial | Atom serial number. |
+---------+--------------+-------------+----------------------------------+
| 13-16 | Atom | name | Atom name. |
+---------+--------------+-------------+----------------------------------+
| 17 | Character | alt_loc | Alternate location indicator. |
+---------+--------------+-------------+----------------------------------+
| 18-20 | Residue name | res_name | Residue name. |
+---------+--------------+-------------+----------------------------------+
| 22 | Character | chain_id | Chain identifier. |
+---------+--------------+-------------+----------------------------------+
| 23-26 | Integer | res_seq | Residue sequence number. |
+---------+--------------+-------------+----------------------------------+
| 27 | AChar | ins_code | Code for insertion of residues. |
+---------+--------------+-------------+----------------------------------+
| 31-38 | Real(8.3) | x | Orthogonal coordinates for X. |
+---------+--------------+-------------+----------------------------------+
| 39-46 | Real(8.3) | y | Orthogonal coordinates for Y. |
+---------+--------------+-------------+----------------------------------+
| 47-54 | Real(8.3) | z | Orthogonal coordinates for Z. |
+---------+--------------+-------------+----------------------------------+
| 55-60 | Real(6.2) | occupancy | Occupancy. |
+---------+--------------+-------------+----------------------------------+
| 61-66 | Real(6.2) | temp_factor | Temperature factor. |
+---------+--------------+-------------+----------------------------------+
| 77-78 | LString(2) | element | Element symbol; right-justified. |
+---------+--------------+-------------+----------------------------------+
| 79-80 | LString(2) | charge | Charge on the atom. |
+---------+--------------+-------------+----------------------------------+
"""
def __init__(self):
super().__init__()
self.serial = None
self.name = None
self.alt_loc = None
self.res_name = None
self.chain_id = None
self.res_seq = None
self.ins_code = None
self.x = None
self.y = None
self.z = None
self.occupancy = 0.00
self.temp_factor = 0.00
self.seg_id = ""
self.element = ""
self.charge = ""
[docs] def parse_line(self, line):
"""Parse PDB-format line.
:param str line: line to parse
"""
super().parse_line(line)
self.serial = int(line[6:11].strip())
self.name = line[12:16].strip()
self.alt_loc = line[16].strip()
try:
self.res_name = line[17:20].strip()
self.chain_id = line[21].strip()
self.res_seq = int(line[22:26].strip())
self.ins_code = line[26].strip()
except IndexError:
raise ValueError("Residue name must be less than 4 characters!")
self.x = float(line[30:38].strip())
self.y = float(line[38:46].strip())
self.z = float(line[46:54].strip())
try:
self.occupancy = float(line[54:60].strip())
self.temp_factor = float(line[60:66].strip())
self.seg_id = line[72:76].strip()
self.element = line[76:78].strip()
self.charge = line[78:80].strip()
except (ValueError, IndexError):
pass
def __str__(self):
return (
f"HETATM{self.serial:5} {atom_format(self)}{self.alt_loc:1}"
f"{self.res_name:>3} {self.chain_id:1}{self.res_seq:4}"
f"{self.ins_code:1} {self.x:8.3f}{self.y:8.3f}{self.z:8.3f}"
f"{self.occupancy:6.2f}{self.temp_factor:6.2f}"
f" {self.element:>2}{self.charge:2}"
)