Source code for pdb_eda.pdbParser

"""
PDB Parser (pdb_eda.pdbParser)
-------------------------------------------------------

This module provides methods to read and parse the PDB format files and returns PDB objects.
Format details of PDB can be found in ftp://ftp.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf.
"""
import re
import numpy as np


[docs]def readPDBfile(file):
    """Creates :class:`pdb_eda.pdbParser.PDBentry` object from file name.

    :param file: The name of a PDB formated file or a file handle.
    :type file: :py:class:`str`, :class:`io.IOBase`
    """
    if isinstance(file,str):
        with open(file, "r") as fileHandle:
            return parse(fileHandle)
    else:
        return parse(file)

[docs]def parse(handle, mode='lite'):
    """Creates :class:`pdb_eda.pdbParser.PDBentry` object from file handle object.

    :param handle: The file handle of a PDB formatted file.
    :type handle: :class:`io.IOBase`
    :param mode: Whether of not to parse all the atoms. 'lite' means do not to parse., defaults to 'lite'
    :type mode: :py:class:`str`

    :return: pdbEntry
    :rtype: :class:`pdb_eda.pdbParser.PDBentry`
    """
    atoms = []
    rotationMats = []
    modelCount = 0
    pdbid = date = method = resolution = rValue = rFree = program = spaceGroup = 0
    for record in handle.readlines():
        if mode == 'lite' and record.startswith('ATOM'):
            break
        elif record.startswith('HEADER'):
            date = record[57: 57+2].strip()
            pdbid = record[62: 62+4].strip()
        elif record.startswith('EXPDTA'):
            method = record[6: 6+30]
            method = method.strip().replace(' ', '_')
        elif record.startswith('REMARK   2 RESOLUTION'):
            match = re.search('RESOLUTION.(.+)ANGSTROMS', record)
            if match:
                resolution = match.group(1).strip()
        elif record.startswith('REMARK   3   R VALUE'):
            match = re.search('^REMARK   3   R VALUE            \(WORKING SET\) : (.+)$', record)
            if match:
                rValue = match.group(1).strip()
        elif record.startswith('REMARK   3   FREE R VALUE'):
            match = re.search('^REMARK   3   FREE R VALUE                     : (.+)$', record)
            if match:
                rFree = match.group(1).strip()
        elif record.startswith('REMARK   3   PROGRAM'):
            match = re.search('^REMARK   3   PROGRAM     : (.+)$', record)
            if match:
                program = match.group(1).strip().replace(' ', '_')
        elif record.startswith('MODEL'):
            modelCount += 1
            if modelCount > 1: break
        elif record.startswith('REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP:'):
            match = re.search('^REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: (.+)$', record)
            if match:
                spaceGroup = match.group(1).strip().replace(' ', '_')
        elif record.startswith('REMARK 290   SMTRY'):
            match = re.search('^REMARK 290   SMTRY(.+)$', record)
            if match:
                items = match.group(1).split()
                if len(rotationMats) < int(items[1]):
                    rotationMats.append(np.zeros((3, 4)))
                rotationMats[int(items[1])-1][int(items[0])-1] = [float(i) for i in items[2:6]]
        elif record.startswith('ATOM') or record.startswith('HETATM'):
            keyValues = {'record': record,
                         'recordType': record[0: 0+6],
                         'serial': record[6: 6+5],
                         'atomName': record[12: 12+4],
                         'alternateLocation': record[16: 16+1],
                         'residueName': record[17: 17+3],
                         'chainID': record[21: 21+1],
                         'residueNumber': record[22: 22+4],
                         'x': record[30: 30+8],
                         'y': record[38: 38+8],
                         'z': record[46: 46+8],
                         'occupancy': record[54: 54+6],
                         'bFactor': record[60: 60+6],
                         'element': record[76: 76+2]}

            keyValues = {key: value.strip() for (key, value) in keyValues.items()}
            atoms.append(Atom(keyValues))

    header = PDBheader(pdbid, date, method, resolution, rValue, rFree, program, spaceGroup, rotationMats)
    return PDBentry(header, atoms)


[docs]class PDBentry:
    """:class:`pdb_eda.pdbParser.PDBentry` class that stores the :class:`pdb_eda.pdbParser.PDBheader` and/or :class:`pdb_eda.pdbParser.Atom` class."""

[docs]    def __init__(self, header, atoms):
        """:class:`pdb_eda.pdbParser.PDBentry` initializer.

        :param header:
        :type header: :class:`pdb_eda.pdbParser.PDBheader`
        :param atoms: list of  :class:`pdb_eda.pdbParser.Atom` objects
        :type atoms: :py:class:`list`
        """
        self.header = header
        self.atoms = atoms


[docs]class PDBheader:
    """:class:`pdb_eda.pdbParser.PDBheader` that stores information about PDB header."""

[docs]    def __init__(self, PDBid, date, method, resolution, rValue, rFree, program, spaceGroup, rotationMats):
        """:class:`pdb_eda.pdbParser.PDBheader` initializer.

        :param PDBid: PDB entry ID.
        :type PDBid: :py:class:`str`
        :param date: PDB structure publish date.
        :type date: :py:class:`str`
        :param method: Experiment method, i.e. X-ray, NMR, etc.
        :type method: :py:class:`str`
        :param resolution: Structure resolution if applicable.
        :type resolution: :py:class:`float`
        :param rValue: Structure's R value.
        :type rValue: :py:class:`float`
        :param rFree: Structure's R free value.
        :type rFree: :py:class:`float`
        :param program: Software for acquiring the structure.
        :type program: :py:class:`str`
        :param spaceGroup: Structure's space group if applicable.
        :type spaceGroup: :py:class:`str`
        :param rotationMats: Structure's rotation matrix and translation matrix if applicable.
        :type rotationMats: :py:class:`list`
        """
        self.pdbid = PDBid
        self.date = date
        self.method = method
        self.resolution = resolution
        self.rValue = rValue
        self.rFree = rFree
        self.program = program
        self.spaceGroup = spaceGroup
        self.rotationMats = rotationMats


[docs]class Atom:
    """:class:`pdb_eda.pdbParser.Atom` that stores information about PDB atoms."""

[docs]    def __init__(self, keyValues):
        """:class:`pdb_eda.pdbParser.Atom` initializer.

        :param keyValues: key-value pairs for atom information.
        :type keyValues: :py:class:`dict`
        """
        self.record = keyValues['record']
        self.recordType = keyValues['recordType']
        self.serial = keyValues['serial']
        self.atomName = keyValues['atomName']
        self.alternateLocation = keyValues['alternateLocation']
        self.residueName = keyValues['residueName']
        self.chainID = keyValues['chainID']
        self.residueNumber = keyValues['residueNumber']
        self.x = keyValues['x']
        self.y = keyValues['y']
        self.z = keyValues['z']
        self.occupancy = keyValues['occupancy']
        self.bFactor = keyValues['bFactor']
        self.element = keyValues['element']