Source code for old_pdb.secondary

"""Classes for records with secondary structure and connectivity information.

.. codeauthor::  Todd Dolinsky
.. codeauthor::  Yong Huang
.. codeauthor::  Nathan Baker
"""
import logging
from .general import BaseRecord, atom_format


_LOGGER = logging.getLogger(__name__)


[docs]class CisPeptide(BaseRecord):
    """CISPEP field

    CISPEP records specify the prolines and other peptides found to be in
    the cis conformation. This record replaces the use of footnote records
    to list cis peptides.

    +---------+-------------+-----------+-------------------------------------+
    | COLUMNS | DATA TYPE   | FIELD     | DEFINITION                          |
    +=========+=============+===========+=====================================+
    | 1-6     | Record name | "CISPEP"  |                                     |
    +---------+-------------+-----------+-------------------------------------+
    | 8-10    | Integer     | ser_num   | Record serial number.               |
    +---------+-------------+-----------+-------------------------------------+
    | 12-14   | LString(3)  | pep1      | Residue name.                       |
    +---------+-------------+-----------+-------------------------------------+
    | 16      | Character   | chain_id1 | Chain identifier.                   |
    +---------+-------------+-----------+-------------------------------------+
    | 18-21   | Integer     | seq_num1  | Residue sequence number.            |
    +---------+-------------+-----------+-------------------------------------+
    | 22      | AChar       | icode1    | Insertion code.                     |
    +---------+-------------+-----------+-------------------------------------+
    | 26-28   | LString(3)  | pep2      | Residue name.                       |
    +---------+-------------+-----------+-------------------------------------+
    | 30      | Character   | chain_id2 | Chain identifier.                   |
    +---------+-------------+-----------+-------------------------------------+
    | 32-35   | Integer     | seq_num2  | Residue sequence number.            |
    +---------+-------------+-----------+-------------------------------------+
    | 36      | AChar       | icode2    | Insertion code.                     |
    +---------+-------------+-----------+-------------------------------------+
    | 44-46   | Integer     | mod_num   | Identifies the specific model.      |
    +---------+-------------+-----------+-------------------------------------+
    | 54-59   | Real(6.2)   | measure   | Angle measurement in degrees.       |
    +---------+-------------+-----------+-------------------------------------+
    """

    def __init__(self):
        super().__init__()
        self.ser_num = None
        self.pep1 = None
        self.chain_id1 = None
        self.seq_num1 = None
        self.icode1 = None
        self.pep2 = None
        self.chain_id2 = None
        self.seq_num2 = None
        self.icode2 = None
        self.mod_num = None
        self.measure = None

[docs]    def parse_line(self, line):
        """Parse PDB-format line.

        :param str line:  line to parse
        """
        super().parse_line(line)
        self.ser_num = int(line[7:10].strip())
        self.pep1 = line[11:14].strip()
        self.chain_id1 = line[15].strip()
        self.seq_num1 = int(line[17:21].strip())
        self.icode1 = line[21].strip()
        self.pep2 = line[25:28].strip()
        self.chain_id2 = line[29].strip()
        self.seq_num2 = int(line[31:35].strip())
        self.icode2 = line[35].strip()
        self.mod_num = int(line[43:46].strip())
        self.measure = float(line[53:59].strip())

    def __str__(self):
        return (
            f"CISPEP {self.ser_num:3} {self.pep1:3} {self.chain_id1:1}"
            f" {self.seq_num1:4}{self.icode1:1}   {self.pep2:3}"
            f" {self.chain_id2:1} {self.seq_num2:4}{self.icode2:1}"
            f"       {self.mod_num:3}       {self.measure:6.2f}"
        )


[docs]class DisulfideBond(BaseRecord):
    """SSBOND field

    The SSBOND record identifies each disulfide bond in protein and
    polypeptide structures by identifying the two residues involved in the
    bond.

    +---------+-------------+-----------+-------------------------------------+
    | COLUMNS | DATA TYPE   | FIELD     | DEFINITION                          |
    +=========+=============+===========+=====================================+
    | 1-6     | Record name | "SSBOND"  |                                     |
    +---------+-------------+-----------+-------------------------------------+
    | 8-10    | Integer     | ser_num   | Serial number.                      |
    +---------+-------------+-----------+-------------------------------------+
    | 12-14   | LString(3)  | "CYS"     | Residue name.                       |
    +---------+-------------+-----------+-------------------------------------+
    | 16      | Character   | chain_id1 | Chain identifier.                   |
    +---------+-------------+-----------+-------------------------------------+
    | 18-21   | Integer     | seq_num1  | Residue sequence number.            |
    +---------+-------------+-----------+-------------------------------------+
    | 22      | AChar       | icode1    | Insertion code.                     |
    +---------+-------------+-----------+-------------------------------------+
    | 26-28   | LString(3)  | "CYS"     | Residue name.                       |
    +---------+-------------+-----------+-------------------------------------+
    | 30      | Character   | chain_id2 | Chain identifier.                   |
    +---------+-------------+-----------+-------------------------------------+
    | 32-35   | Integer     | seq_num2  | Residue sequence number.            |
    +---------+-------------+-----------+-------------------------------------+
    | 36      | AChar       | icode2    | Insertion code.                     |
    +---------+-------------+-----------+-------------------------------------+
    | 60-65   | SymOP       | sym1      | Symmetry operator for residue 1.    |
    +---------+-------------+-----------+-------------------------------------+
    | 67-72   | SymOP       | sym2      | Symmetry operator for residue 2.    |
    +---------+-------------+-----------+-------------------------------------+
    | 74-78   | Real(5.2)   | length    | Disulfide bond distance             |
    +---------+-------------+-----------+-------------------------------------+
    """

    def __init__(self):
        super().__init__()
        self.ser_num = None
        self.chain_id1 = None
        self.seq_num1 = None
        self.icode1 = None
        self.chain_id2 = None
        self.seq_num2 = None
        self.icode2 = None
        self.sym1 = None
        self.sym2 = None
        self.length = None

[docs]    def parse_line(self, line):
        """Parse PDB-format line.

        :param str line:  line to parse
        """
        super().parse_line(line)
        self.ser_num = int(line[7:10].strip())
        self.chain_id1 = line[15].strip()
        self.seq_num1 = int(line[17:21].strip())
        self.icode1 = line[21].strip()
        self.chain_id2 = line[29].strip()
        self.seq_num2 = int(line[31:35].strip())
        self.icode2 = line[35].strip()
        self.sym1 = line[59:65].strip()
        self.sym2 = line[66:72].strip()
        self.length = float(line[73:78])

    def __str__(self):
        return (
            f"SSBOND {self.ser_num:3} CYS {self.chain_id1:1} {self.seq_num1:4}"
            f"{self.icode1:1}   CYS {self.chain_id2:1} {self.seq_num2:4}"
            f"{self.icode2:1}                         {self.sym1:6}"
            f" {self.sym2:6}{self.length:4.2f}"
        )


[docs]class Helix(BaseRecord):
    """HELIX field

    HELIX records are used to identify the position of helices in the
    molecule. Helices are both named and numbered. The residues where the
    helix begins and ends are noted, as well as the total length.

    +---------+--------------+---------------+--------------------------------+
    | COLUMNS | DATA TYPE    | FIELD         | DEFINITION                     |
    +=========+==============+===============+================================+
    | 1-6     | Record name  | "HELIX "      |                                |
    +---------+--------------+---------------+--------------------------------+
    | 8-10    | Integer      | serNum        | Serial number of the helix.    |
    |         |              |               | starts at 1 and increases      |
    |         |              |               | incrementally.                 |
    +---------+--------------+---------------+--------------------------------+
    | 12-14   | LString(3)   | helix_id      | Helix identifier. In addition  |
    |         |              |               | to a serial number, each helix |
    |         |              |               | is given an alphanumeric       |
    |         |              |               | helix identifier.              | 
    +---------+--------------+---------------+--------------------------------+
    | 16-18   | Residue name | init_res_name | Name of the initial residue.   |
    +---------+--------------+---------------+--------------------------------+
    | 20      | Character    | init_chain_id | Chain identifier for the chain |
    |         |              |               | containing this helix.         |
    +---------+--------------+---------------+--------------------------------+
    | 22-25   | Integer      | init_seq_num  | Sequence number of the initial |
    |         |              |               | residue.                       |
    +---------+--------------+---------------+--------------------------------+
    | 26      | AChar        | init_i_code   | Insertion code of the initial  |
    |         |              |               | residue.                       |
    +---------+--------------+---------------+--------------------------------+
    | 28-30   | Residue name | end_res_name  | Name of the terminal residue   |
    |         |              |               | of the helix.                  |
    +---------+--------------+---------------+--------------------------------+
    | 32      | Character    | end_chain_id  | Chain identifier for the chain |
    |         |              |               | containing this helix.         |
    +---------+--------------+---------------+--------------------------------+
    | 34-37   | Integer      | end_seq_num   | Sequence number of the         |
    |         |              |               | terminal residue.              |
    +---------+--------------+---------------+--------------------------------+
    | 38      | AChar        | end_i_code    | Insertion code of the terminal |
    |         |              |               | residue.                       |
    +---------+--------------+---------------+--------------------------------+
    | 39-40   | Integer      | helix_class   | Helix class (see below).       |
    +---------+--------------+---------------+--------------------------------+
    | 41-70   | String       | comment       | Comment about this helix.      |
    +---------+--------------+---------------+--------------------------------+
    | 72-76   | Integer      | length        | Length of this helix.          |
    +---------+--------------+---------------+--------------------------------+
    """

    def __init__(self):
        super().__init__()
        self.ser_num = None
        self.helix_id = None
        self.init_res_name = None
        self.init_chain_id = None
        self.init_seq_num = None
        self.init_i_code = None
        self.end_res_name = None
        self.end_chain_id = None
        self.end_seq_num = None
        self.end_i_code = None
        self.helix_class = None
        self.comment = None
        self.length = None

[docs]    def parse_line(self, line):
        """Parse PDB-format line.

        :param str line:  line to parse
        """
        super().parse_line(line)
        self.ser_num = int(line[7:10].strip())
        self.helix_id = line[11:14].strip()
        self.init_res_name = line[15:18].strip()
        self.init_chain_id = line[19].strip()
        self.init_seq_num = int(line[21:25].strip())
        self.init_i_code = line[25].strip()
        self.end_res_name = line[27:30].strip()
        self.end_chain_id = line[31].strip()
        self.end_seq_num = int(line[33:37].strip())
        self.end_i_code = line[37].strip()
        try:
            self.helix_class = int(line[38:40].strip())
        except ValueError:
            pass
        self.comment = line[40:70].strip()
        try:
            self.length = int(line[71:76].strip())
        except ValueError:
            pass

    def __str__(self):
        return (
            f"HELIX  {self.ser_num:3} {self.helix_id:>3}"
            f" {self.init_res_name:3} {self.init_chain_id:1}"
            f" {self.init_seq_num:4}{self.init_i_code:1}"
            f" {self.end_res_name:3} {self.end_chain_id:1}"
            f" {self.end_seq_num:4}{self.end_i_code:1}{self.helix_class:2}"
            f"{self.comment:30} {self.length:5}"
        )


[docs]class Link(BaseRecord):
    """LINK field

    The LINK records specify connectivity between residues that is not
    implied by the primary structure. Connectivity is expressed in terms of
    the atom names. This record supplements information given in CONECT
    records and is provided here for convenience in searching.

    .. todo::  Clean up output using element/atom information and formatting

    +---------+--------------+-----------+------------------------------------+
    | COLUMNS | DATA TYPE    | FIELD     | DEFINITION                         |
    +=========+==============+===========+====================================+
    | 1-6     | Record name  | "LINK  "  |                                    |
    +---------+--------------+-----------+------------------------------------+
    | 13-16   | Atom         | name1     | Atom name.                         |
    +---------+--------------+-----------+------------------------------------+
    | 17      | Character    | alt_loc1  | Alternate location indicator.      |
    +---------+--------------+-----------+------------------------------------+
    | 18-20   | Residue name | res_name1 | Residue  name.                     |
    +---------+--------------+-----------+------------------------------------+
    | 22      | Character    | chain_id  | Chain identifier.                  |
    +---------+--------------+-----------+------------------------------------+
    | 23-26   | Integer      | res_seq1  | Residue sequence number.           |
    +---------+--------------+-----------+------------------------------------+
    | 27      | AChar        | ins_code1 | Insertion code.                    |
    +---------+--------------+-----------+------------------------------------+
    | 43-46   | Atom         | name2     | Atom name.                         |
    +---------+--------------+-----------+------------------------------------+
    | 47      | Character    | alt_loc2  | Alternate location indicator.      |
    +---------+--------------+-----------+------------------------------------+
    | 48-50   | Residue name | res_name2 | Residue name.                      |
    +---------+--------------+-----------+------------------------------------+
    | 52      | Character    | chain_id  | Chain identifier.                  |
    +---------+--------------+-----------+------------------------------------+
    | 53-56   | Integer      | res_seq2  | Residue sequence number.           |
    +---------+--------------+-----------+------------------------------------+
    | 57      | AChar        | ins_code2 | Insertion code.                    |
    +---------+--------------+-----------+------------------------------------+
    | 60-65   | SymOP        | sym1      | Symmetry operator atom 1.          |
    +---------+--------------+-----------+------------------------------------+
    | 67-72   | SymOP        | sym2      | Symmetry operator atom 2.          |
    +---------+--------------+-----------+------------------------------------+
    | 74-78   | Real(5.2)    | Length    | Link distance                      |
    +---------+--------------+-----------+------------------------------------+
    """

    def __init__(self):
        super().__init__()
        self.name1 = None
        self.alt_loc1 = None
        self.res_name1 = None
        self.chain_id1 = None
        self.res_seq1 = None
        self.ins_code1 = None
        self.name2 = None
        self.alt_loc2 = None
        self.res_name2 = None
        self.chain_id2 = None
        self.res_seq2 = None
        self.ins_code2 = None
        self.sym1 = None
        self.sym2 = None

[docs]    def parse_line(self, line):
        """Parse PDB-format line.

        :param str line:  line to parse
        """
        super().parse_line(line)
        self.name1 = line[12:16].strip()
        self.alt_loc1 = line[16].strip()
        self.res_name1 = line[17:20].strip()
        self.chain_id1 = line[21].strip()
        self.res_seq1 = int(line[22:26].strip())
        self.ins_code1 = line[26].strip()
        self.name2 = line[42:46].strip()
        self.alt_loc2 = line[46].strip()
        self.res_name2 = line[47:50].strip()
        self.chain_id2 = line[51].strip()
        self.res_seq2 = int(line[52:56].strip())
        self.ins_code2 = line[56].strip()
        self.sym1 = line[59:65].strip()
        self.sym2 = line[66:72].strip()
        self.length = line[73:78]
        self.is_element1 = None
        self.is_element2 = None

    def __str__(self):
        # See atom-name formatting rules at 
        # https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
        if None in [self.is_element1, self.is_element2]:
            err = (
                "Must run annotate_link first before correctly formatted "
                "strings can be produced."
            )
            raise ValueError(err)
        if self.is_element1:
            name1 = f"{self.name1:>2}  "[:4]
        elif len(self.name1) == 2:
            name1 = f" {self.name1} "
        else:
            name1 = f"{self.name1:>4}"
        if self.is_element2:
            name2 = f"{self.name2:>2}  "[:4]
        elif len(self.name2) == 2:
            name2 = f" {self.name2} "
        else:
            name2 = f"{self.name2:>4}"
        string = f"LINK        "
        string += (
            f"{name1}{self.alt_loc1:1}{self.res_name1:>3} {self.chain_id1:1}"
            f"{self.res_seq1:4}{self.ins_code1:1}               "
        )
        string += (
            f"{name2}{self.alt_loc2:1}{self.res_name2:>3} {self.chain_id2}"
            f"{self.res_seq2:4}{self.ins_code2:1}  {self.sym1:>6} "
            f"{self.sym2:>6} {self.length:5}"
        )
        return string


[docs]class Sheet(BaseRecord):
    """SHEET field

    SHEET records are used to identify the position of sheets in the
    molecule. Sheets are both named and numbered. The residues where the
    sheet begins and ends are noted.

    +---------+--------------+---------------+--------------------------------+
    | COLUMNS | DATA TYPE    | FIELD         | DEFINITION                     |
    +=========+==============+===============+================================+
    | 1-6     | Record name  | "SHEET "      |                                |
    +---------+--------------+---------------+--------------------------------+
    | 8-10    | Integer      | strand        | Strand number which starts at  |
    |         |              |               | 1 for each strand within a     |
    |         |              |               | sheet and increases by one.    |
    +---------+--------------+---------------+--------------------------------+
    | 12-14   | LString(3)   | sheet_id      | Sheet identifier.              |
    +---------+--------------+---------------+--------------------------------+
    | 15-16   | Integer      | num_strands   | Number of strands in sheet.    |
    +---------+--------------+---------------+--------------------------------+
    | 18-20   | Residue name | init_res_name | Name of initial residue.       |
    +---------+--------------+---------------+--------------------------------+
    | 22      | Character    | init_chain_id | Chain identifier of initial    |
    |         |              |               | residue in strand.             |
    +---------+--------------+---------------+--------------------------------+
    | 23-26   | Integer      | init_seq_num  | Sequence number of initial     |
    |         |              |               | residue in strand.             |
    +---------+--------------+---------------+--------------------------------+
    | 27      | AChar        | init_ins_code | Insertion code of initial      |
    |         |              |               | residue in strand.             |
    +---------+--------------+---------------+--------------------------------+
    | 29-31   | Residue name | end_res_name  | Name of terminal residue       |
    +---------+--------------+---------------+--------------------------------+
    | 33      | Character    | end_chain_id  | Chain identifier of terminal   |
    |         |              |               | residue                        |
    +---------+--------------+---------------+--------------------------------+
    | 34-37   | Integer      | end_seq_num   | Sequence number of terminal    |
    |         |              |               | residue.                       |
    +---------+--------------+---------------+--------------------------------+
    | 38      | AChar        | end_ins_code  | Insertion code of terminal     |
    |         |              |               | residue.                       |
    +---------+--------------+---------------+--------------------------------+
    | 39-40   | Integer      | sense         | Sense of strand with respect   |
    |         |              |               | to previous strand in the      |
    |         |              |               | sheet. 0 if first strand, 1 if |
    |         |              |               | parallel, and -1 if            |
    |         |              |               | anti-parallel.                 |
    +---------+--------------+---------------+--------------------------------+
    | 42-45   | Atom         | cur_atom      | Registration. Atom name in     |
    |         |              |               | current strand.                |
    +---------+--------------+---------------+--------------------------------+
    | 46-48   | Residue name | cur_res_name  | Registration. Residue name in  |
    |         |              |               | current strand                 |
    +---------+--------------+---------------+--------------------------------+
    | 50      | Character    | cur_chain_id  | Registration. Chain identifier |
    |         |              |               | in current strand.             |
    +---------+--------------+---------------+--------------------------------+
    | 51-54   | Integer      | cur_res_seq   | Registration. Residue sequence |
    |         |              |               | number in current strand.      |
    +---------+--------------+---------------+--------------------------------+
    | 55      | AChar        | cur_ins_code  | Registration. Insertion code   |
    |         |              |               | in current strand.             |
    +---------+--------------+---------------+--------------------------------+
    | 57-60   | Atom         | prev_atom     | Registration. Atom name in     |
    |         |              |               | previous strand.               |
    +---------+--------------+---------------+--------------------------------+
    | 61-63   | Residue name | prev_res_name | Registration. Residue name in  |
    |         |              |               | previous strand.               |
    +---------+--------------+---------------+--------------------------------+
    | 65      | Character    | prev_chain_id | Registration. Chain identifier |
    |         |              |               | in previous strand.            |
    +---------+--------------+---------------+--------------------------------+
    | 66-69   | Integer      | prev_res_seq  | Registration. Residue sequence |
    |         |              |               | number in previous strand.     |
    +---------+--------------+---------------+--------------------------------+
    | 70      | AChar        | prev_ins_code | Registration. Insertion code   |
    |         |              |               | in previous strand.            |
    +---------+--------------+---------------+--------------------------------+
    """

    def __init__(self):
        super().__init__()
        self.strand = None
        self.sheet_id = None
        self.num_strands = None
        self.init_res_name = None
        self.init_chain_id = None
        self.init_seq_num = None
        self.init_ins_code = None
        self.end_res_name = None
        self.end_chain_id = None
        self.end_seq_num = None
        self.end_ins_code = None
        self.sense = None
        self.cur_atom = ""
        self.curr_res_name = ""
        self.curr_chain_id = ""
        self.curr_res_seq = ""
        self.curr_ins_code = ""
        self.prev_atom = ""
        self.prev_res_name = ""
        self.prev_chain_id = ""
        self.prev_res_seq = ""
        self.prev_ins_code = ""

[docs]    def parse_line(self, line):
        """Parse PDB-format line.

        :param str line:  line to parse
        """
        super().parse_line(line)
        self.strand = int(line[7:10].strip())
        self.sheet_id = line[11:14].strip()
        self.num_strands = int(line[14:16].strip())
        self.init_res_name = line[17:20].strip()
        self.init_chain_id = line[21].strip()
        self.init_seq_num = int(line[22:26].strip())
        self.init_ins_code = line[26].strip()
        self.end_res_name = line[28:31].strip()
        self.end_chain_id = line[32].strip()
        self.end_seq_num = int(line[33:37].strip())
        self.end_ins_code = line[37].strip()
        self.sense = int(line[38:40].strip())
        self.cur_atom = line[41:45].strip()
        self.curr_res_name = line[45:48].strip()
        try:
            self.curr_chain_id = line[49].strip()
            try:
                self.curr_res_seq = int(line[50:54].strip())
            except ValueError:
                self.curr_res_seq = ""
            self.curr_ins_code = line[54].strip()
            self.prev_atom = line[56:60].strip()
            self.prev_res_name = line[60:63].strip()
            self.prev_chain_id = line[64].strip()
            try:
                self.prev_res_seq = int(line[65:69].strip())
            except ValueError:
                self.prev_res_seq = ""
            self.prev_ins_code = line[69].strip()
        except IndexError:
            pass

    def __str__(self):
        string = (
            f"SHEET  {self.strand:3} {self.sheet_id:>3}{self.num_strands:2}"
            f" {self.init_res_name:3} {self.init_chain_id:1}"
            f"{self.init_seq_num:4}{self.init_ins_code:1} {self.end_res_name:3}"
            f" {self.end_chain_id:1}{self.end_seq_num:4}{self.end_ins_code:1}"
            f"{self.sense:2}"
        )
        if len(self.cur_atom) == 1:
            string += f"  {self.cur_atom:3}"
        else:
            string += f" {self.cur_atom:4}"
        string += f"{self.curr_res_name:3} "
        string += f"{self.curr_chain_id:1}"
        string += f"{self.curr_res_seq:4}"
        string += f"{self.curr_ins_code:1}"
        if len(self.prev_atom) == 1:
            string += f"  {self.prev_atom:3}"
        else:
            string += f" {self.prev_atom:4}"
        string += (
            f"{self.prev_res_name:3} {self.prev_chain_id:1}"
            f"{self.prev_res_seq:4}{self.prev_ins_code:1}"
        )
        return string