Source code for pdb_eda.densityAnalysis

"""
PDB Electron Density Analysis (pdb_eda.densityAnalysis)
-------------------------------------------------------

This module provides methods for the creation of the :class:`pdb_eda.densityAnalysis` class given a PDB id,
along with methods to analyze its 2Fo-Fc and Fo-Fc electron density maps.
"""

import copy
import urllib.request
import os.path
import gzip

import itertools
import collections
import json
import numpy as np
import Bio.PDB as biopdb
import scipy.spatial
from scipy import stats


from . import ccp4
from . import pdbParser

try:
    from . import cutils as utils
except ImportError:
    from . import utils

## Starting data originally from https://arxiv.org/pdf/0804.2488.pdf
paramsPath = os.path.join(os.path.dirname(__file__), 'conf/optimized_params.json')
f000ParamsPath = os.path.join(os.path.dirname(__file__), 'conf/f000_parameters.json.gz')

paramsGlobal = None
with open(paramsPath, 'r') as fh:
    paramsGlobal = json.load(fh)

radiiGlobal = paramsGlobal['radii']
slopesGlobal = paramsGlobal['slopes']
bondedAtomsGlobal = paramsGlobal['bonded_atoms']
elementElectronsGlobal = None
masterFullAtomNameMapElectronsGlobal = None
fullAtomNameMapElectronsGlobal = paramsGlobal['full_atom_name_map_electrons']
fullAtomNameMapAtomTypeGlobal = paramsGlobal['full_atom_name_map_atom_type']
atomTypeLengthGlobal = max(len(atomType) for atomType in fullAtomNameMapAtomTypeGlobal.values()) + 5

[docs]def setGlobals(params): """Sets global parameters. Typically used for optimizing parameters. :param params: dictionary of parameters needed for pdb_eda calculations. :type params: :py:class:`dict` """ global paramsGlobal global radiiGlobal global slopesGlobal global bondedAtomsGlobal global fullAtomNameMapElectronsGlobal global fullAtomNameMapAtomTypeGlobal global atomTypeLengthGlobal paramsGlobal = params radiiGlobal = paramsGlobal['radii'] slopesGlobal = paramsGlobal['slopes'] bondedAtomsGlobal = paramsGlobal['bonded_atoms'] fullAtomNameMapElectronsGlobal = paramsGlobal['full_atom_name_map_electrons'] fullAtomNameMapAtomTypeGlobal = paramsGlobal['full_atom_name_map_atom_type'] atomTypeLengthGlobal = max(len(atomType) for atomType in fullAtomNameMapAtomTypeGlobal.values()) + 5
[docs]def loadF000Parameters(): """Loads and assigns global parameters needed for F000 estimation.""" with gzip.open(f000ParamsPath, 'rt') as gzipFile: f000Params = json.load(gzipFile) global elementElectronsGlobal elementElectronsGlobal = f000Params["element_map_electrons"] global masterFullAtomNameMapElectronsGlobal masterFullAtomNameMapElectronsGlobal = f000Params["full_atom_name_map_electrons"]
ccp4urlPrefix = "http://www.ebi.ac.uk/pdbe/coordinates/files/" ccp4folder = './ccp4_data/' pdbfolder = './pdb_data/' pdburlPrefix = "http://ftp.rcsb.org/pub/pdb/data/structures/all/pdb/" mmcifurlPrefix = "http://ftp.rcsb.org/pub/pdb/data/structures/all/mmCIF/"
[docs]def fromPDBid(pdbid, ccp4density=True, ccp4diff=True, pdbbio=True, pdbi=True, downloadFile=True, mmcif=False): """Creates :class:`pdb_eda.densityAnalysis.DensityAnalysis` object given the PDB id if the id is valid and the structure has electron density file available. :param pdbid: PDB entry ID. :type pdbid: :py:class:`str` :param ccp4density: Whether to generate ccp4 density object, defaults to :py:obj:`True`. :type ccp4density: :py:class:`bool` :param ccp4diff: Whether to generate in default of ccp4 difference density object, defaults to :py:obj:`True`. :type ccp4diff: :py:class:`bool` :param pdbbio: Whether to generate in default of bio.PDB object, defaults to :py:obj:`True`. :type pdbbio: :py:class:`bool` :param pdbi: Whether to generate in default of PDB object, defaults to :py:obj:`True`. :type pdbi: :py:class:`bool` :param downloadFile: Whether to save the downloaded ccp4 density, ccp4 difference density, and PDB file, defaults to :py:obj:`True`. :type downloadFile: :py:class:`bool` :param mmcif: Whether to download the mmCif file, defaults to :py:obj:`False`. :type mmcif: :py:class:`bool` :return: densityAnalysis object :rtype: :class:`pdb_eda.densityAnalysis.DensityAnalysis` """ pdbid = pdbid.lower() densityObj = None diffDensityObj = None pdbObj = None biopdbObj = None try: if ccp4density: ## ccp4 2Fo - Fc map parser if downloadFile: if not os.path.exists(ccp4folder): os.makedirs(ccp4folder) ccp4file = ccp4folder + pdbid + '.ccp4' if not os.path.isfile(ccp4file): url = ccp4urlPrefix + pdbid + '.ccp4' urllib.request.urlretrieve(url, ccp4file) densityObj = ccp4.read(ccp4file, pdbid) else: densityObj = ccp4.readFromPDBID(pdbid) densityObj.densityCutoff = densityObj.meanDensity + 1.5 * densityObj.stdDensity densityObj.densityCutoffFromHeader = densityObj.header.densityMean + 1.5 * densityObj.header.rmsd if ccp4diff: ## ccp4 Fo - Fc map parser if downloadFile: if not os.path.exists(ccp4folder): os.makedirs(ccp4folder) ccp4diffFile = ccp4folder + pdbid + '_diff.ccp4' if not os.path.isfile(ccp4diffFile): url = ccp4urlPrefix + pdbid + '_diff.ccp4' urllib.request.urlretrieve(url, ccp4diffFile) diffDensityObj = ccp4.read(ccp4diffFile, pdbid) else: diffDensityObj = ccp4.readFromPDBID(pdbid + '_diff') diffDensityObj.diffDensityCutoff = diffDensityObj.meanDensity + 3 * diffDensityObj.stdDensity if pdbbio or pdbi: pdbfile = pdbfolder + 'pdb' + pdbid + '.ent.gz' if not os.path.isfile(pdbfile): if not os.path.exists(pdbfolder): os.makedirs(pdbfolder) url = pdburlPrefix + 'pdb' + pdbid + '.ent.gz' urllib.request.urlretrieve(url, pdbfile) if pdbbio: # Bio Python PDB parser with gzip.open(pdbfile, 'rt') as gzipFile: parser = biopdb.PDBParser(QUIET=True) biopdbObj = parser.get_structure(pdbid, gzipFile) if pdbi: with gzip.open(pdbfile, 'rt') as gzipFile: pdbObj = pdbParser.readPDBfile(gzipFile) if mmcif and downloadFile: mmcifFile = pdbfolder + pdbid + '.cif.gz' if not os.path.isfile(mmcifFile): if not os.path.exists(pdbfolder): os.makedirs(pdbfolder) url = mmcifurlPrefix + pdbid + '.cif.gz' urllib.request.urlretrieve(url, mmcifFile) except: return 0 return DensityAnalysis(pdbid, densityObj, diffDensityObj, biopdbObj, pdbObj)
[docs]def fromFile(pdbFile, ccp4DensityFile=None, ccp4DiffDensityFile=None): """Creates :class:`pdb_eda.densityAnalysis.DensityAnalysis` object given the appropriate PDB and CCP4 files. :param pdbFile: PDB entry file. :type pdbFile: :py:class:`str`, :class:`io.IOBase` :param ccp4DensityFile: ccp4 density file. :type ccp4DensityFile: :py:class:`str`, :class:`io.IOBase`, optional :param ccp4DiffDensityFile: ccp4 difference density file. :type ccp4DiffDensityFile: :py:class:`str`, :class:`io.IOBase`, optional :return: densityAnalysis object :rtype: :class:`pdb_eda.densityAnalysis.DensityAnalysis` """ pdbid = "xxxx" densityObj = None diffDensityObj = None try: if ccp4DensityFile != None: if isinstance(ccp4DensityFile,str): densityObj = ccp4.read(ccp4DensityFile, pdbid) else: densityObj = ccp4.parse(ccp4DensityFile, pdbid) densityObj.densityCutoff = densityObj.meanDensity + 1.5 * densityObj.stdDensity densityObj.densityCutoffFromHeader = densityObj.header.densityMean + 1.5 * densityObj.header.rmsd if ccp4DiffDensityFile != None: if isinstance(ccp4DiffDensityFile,str): diffDensityObj = ccp4.read(ccp4DiffDensityFile, pdbid) else: diffDensityObj = ccp4.parse(ccp4DiffDensityFile, pdbid) diffDensityObj.diffDensityCutoff = diffDensityObj.meanDensity + 3 * diffDensityObj.stdDensity if isinstance(pdbFile,str) and pdbFile.endswith(".gz"): with gzip.open(pdbFile, 'rt') as gzipFile: parser = biopdb.PDBParser(QUIET=True) biopdbObj = parser.get_structure(pdbid, gzipFile) with gzip.open(pdbFile, 'rt') as gzipFile: pdbObj = pdbParser.readPDBfile(gzipFile) else: parser = biopdb.PDBParser(QUIET=True) biopdbObj = parser.get_structure(pdbid, pdbFile) pdbObj = pdbParser.readPDBfile(pdbFile) except: return 0 return DensityAnalysis(pdbid, densityObj, diffDensityObj, biopdbObj, pdbObj)
[docs]def cleanPDBid(pdbid): """Removes PDB entry, ccp4, and mmcif files associated with a PDB id. :param pdbid: PDB entry ID. :type pdbid: :py:class:`str` :return: bool whether the operation was successful. :rtype: :py:class:`bool` """ pdbid = pdbid.lower() try: ccp4file = ccp4folder + pdbid + '.ccp4' if os.path.isfile(ccp4file): os.remove(ccp4file) ccp4diffFile = ccp4folder + pdbid + '_diff.ccp4' if os.path.isfile(ccp4diffFile): os.remove(ccp4diffFile) pdbfile = pdbfolder + 'pdb' + pdbid + '.ent.gz' if os.path.isfile(pdbfile): os.remove(pdbfile) mmcifFile = pdbfolder + pdbid + '.cif.gz' if os.path.isfile(mmcifFile): os.remove(mmcifFile) except: return False return True
[docs]def testCCP4URL(pdbid): """Test whether the pdbid has electron density maps by querying if the PDBe API has electron density statistics. :param pdbid: PDB entry ID. :type pdbid: :py:class:`str` :return: bool on test success. :rtype: :py:class:`bool` """ try: url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/electron_density_statistics/" + pdbid request = urllib.request.urlopen(url) except urllib.request.HTTPError: return False return True
[docs]class DensityAnalysis(object): """DensityAnalysis class that stores the density, difference density, bio.PDB, and PDB objects."""
[docs] def __init__(self, pdbid, densityObj=None, diffDensityObj=None, biopdbObj=None, pdbObj=None): """`densityAnalysis` initializer. Leave `densityObj`, `diffDensityObj`, `biopdbObj` and `pdbObj` as :py:obj:`None` to be created. They are not required for initialization but could be required for some methods. :param pdbid: PDB entry ID. :type pdbid: :py:class:`str` :param densityObj: DensityMatrix object. :type densityObj: :class:`pdb_eda.ccp4.DensityMatrix`, optional :param diffDensityObj: optional DensityMatrix object. :type diffDensityObj: :class:`pdb_eda.ccp4.DensityMatrix`, optional :param biopdbObj: optional Bio.PDB Structure object. :type biopdbObj: :class:`Bio.PDB.Structure.Structure`, optional :param pdbObj: optional PDBentry object. :type pdbObj: :class:`pdb_eda.pdbParser.PDBentry`, optional """ self.pdbid = pdbid self.densityObj = densityObj self.diffDensityObj = diffDensityObj self.biopdbObj = biopdbObj self.pdbObj = pdbObj self._symmetryAtoms = None self._symmetryOnlyAtoms = None self._asymmetryAtoms = None self._symmetryAtomCoords = None self._symmetryOnlyAtomCoords = None self._asymmetryAtomCoords = None self._greenBlobList = None self._redBlobList = None self._blueBlobList = None self._fc = None self._medians = None self._atomCloudDescriptions = None self._residueCloudDescriptions = None self._domainCloudDescriptions = None self._F000 = None self._densityElectronRatio = None self._numVoxelsAggregated = None self._totalAggregatedElectrons = None self._totalAggregatedDensity = None self._atomTypeOverlapCompleteness = None self._atomTypeOverlapIncompleteness = None
@property def symmetryAtoms(self): """Returns list of symmetry atoms. :return: symmetryAtoms :rtype: :py:class:`list` """ if self._symmetryAtoms is None: self._calculateSymmetryAtoms() return self._symmetryAtoms @property def symmetryOnlyAtoms(self): """Returns list of non-[0,0,0,0] symmetry atoms. :return: symmetryAtoms :rtype: :py:class:`list` """ if self._symmetryOnlyAtoms is None: self._calculateSymmetryAtoms() return self._symmetryOnlyAtoms @property def asymmetryAtoms(self): """Returns list of [0,0,0,0] symmetry atoms. :return: symmetryAtoms :rtype: :py:class:`list` """ if self._asymmetryAtoms is None: self._calculateSymmetryAtoms() return self._asymmetryAtoms @property def symmetryAtomCoords(self): """Returns list of symmetry atom xyz coordinates. :return: symmetryAtomCoords :rtype: :py:class:`list` """ if self._symmetryAtoms is None: self._calculateSymmetryAtoms() return self._symmetryAtomCoords @property def symmetryOnlyAtomCoords(self): """Returns list of non-[0,0,0,0] symmetry atom xyz coordinates. :return: symmetryAtomCoords :rtype: :py:class:`list` """ if self._symmetryOnlyAtoms is None: self._calculateSymmetryAtoms() return self._symmetryOnlyAtomCoords @property def asymmetryAtomCoords(self): """Returns list of [0,0,0,0] symmetry atom xyz coordinates. :return: symmetryAtomCoords :rtype: :py:class:`list` """ if self._asymmetryAtoms is None: self._calculateSymmetryAtoms() return self._asymmetryAtomCoords @property def greenBlobList(self): """Returns list of all positive significant difference density blobs. :return: greenBlobList :rtype: :py:class:`list` """ if self._greenBlobList is None: self._greenBlobList = self.diffDensityObj.createFullBlobList(self.diffDensityObj.diffDensityCutoff) return self._greenBlobList @property def redBlobList(self): """Returns list of all negative significant difference density blobs. :return: redBlobList :rtype: :py:class:`list` """ if self._redBlobList is None: self._redBlobList = self.diffDensityObj.createFullBlobList(-1 * self.diffDensityObj.diffDensityCutoff) return self._redBlobList @property def blueBlobList(self): """Returns list of all positive significant density blobs. :return: blueBlobList :rtype: :py:class:`list` """ if self._blueBlobList is None: self._blueBlobList = self.densityObj.createFullBlobList(self.densityObj.densityCutoff) return self._blueBlobList @property def fc(self): """Returns the Fc map = 2Fo-Fc - Fo-Fc. :return: fc :rtype: :class:`pdb_eda.ccp4.DensityMatrix` """ if self._fc is None: self._fc = copy.deepcopy(self.densityObj) self._fc.density = self.densityObj.density - self.diffDensityObj.density * 2 return self._fc @property def fo(self): """Returns the Fo map = 2Fo-Fc. :return: fo :rtype: :class:`pdb_eda.ccp4.DensityMatrix` """ return self.densityObj # using the 2Fo-Fc as the Fo map. @property def F000(self): """Returns estimated F000. This estimate may not be that accurate. :return: F000 :rtype: :py:class:`float` """ if self._F000 is None: self._F000 = self.estimateF000() return self._F000 @property def medians(self): """Returns median field values calculated per atom type. :return: medians :rtype: :class:`numpy.array` """ if self._medians is None: self.aggregateCloud() return self._medians @property def atomCloudDescriptions(self): """Returns aggregated atom cloud descriptions. :return: atomCloudDescriptions :rtype: :class:`numpy.array` """ if self._atomCloudDescriptions is None: self.aggregateCloud() return self._atomCloudDescriptions @property def residueCloudDescriptions(self): """Returns aggregated residue cloud descriptions. :return: residueCloudDescriptions :rtype: :py:class:`list` """ if self._residueCloudDescriptions is None: self.aggregateCloud() return self._residueCloudDescriptions @property def domainCloudDescriptions(self): """Returns aggregated domain cloud descriptions. :return: domainCloudDescriptions :rtype: :py:class:`list` """ if self._domainCloudDescriptions is None: self.aggregateCloud() return self._domainCloudDescriptions @property def numVoxelsAggregated(self): """Returns number of aggregated voxels in cloud analysis. :return: numVoxelsAggregated :rtype: :py:class:`int` """ if self._numVoxelsAggregated is None: self.aggregateCloud() return self._numVoxelsAggregated @property def totalAggregatedElectrons(self): """Returns total amount of aggregated electrons in cloud analysis. :return: totalAggregatedElectrons :rtype: :py:class:`float` """ if self._totalAggregatedElectrons is None: self.aggregateCloud() return self._totalAggregatedElectrons @property def totalAggregatedDensity(self): """Returns total amount of aggregated density in cloud analysis. :return: totalAggregatedDensity :rtype: :py:class:`float` """ if self._totalAggregatedDensity is None: self.aggregateCloud() return self._totalAggregatedDensity @property def densityElectronRatio(self): """Returns the density-electron ratio estimated from cloud analysis. :return: densityElectronRatio :rtype: :py:class:`float` """ if self._densityElectronRatio == None: self.aggregateCloud() return self._densityElectronRatio @property def atomTypeOverlapCompleteness(self): """Returns atom-type overlap completeness counts. :return: atomTypeOverlapCompleteness :rtype: :py:class:`dict` """ if self._atomTypeOverlapCompleteness is None: self.aggregateCloud() return self._atomTypeOverlapCompleteness @property def atomTypeOverlapIncompleteness(self): """Returns atom-type overlap incompleteness counts. :return: atomTypeOverlapIncompleteness :rtype: :py:class:`dict` """ if self._atomTypeOverlapIncompleteness is None: self.aggregateCloud() return self._atomTypeOverlapIncompleteness residueCloudHeader = ['chain', 'residue_number', 'residue_name', 'local_density_electron_ratio', 'num_voxels', 'electrons', 'volume', 'centroid_xyz'] domainCloudHeader = residueCloudHeader
[docs] def aggregateCloud(self, minCloudElectrons=25.0, minTotalElectrons=400.0): """Aggregate the electron density map clouds by atom, residue, and domain. Calculate and populate `densityAnalysis.densityElectronRatio` and `densityAnalysis.medians` data members. :param minCloudElectrons: minimum number of electrons needed to aggregate a residue or domain cloud., defaults to 25.0 :type minCloudElectrons: :py:class:`float` :param minTotalElectrons: mininum number of electrons required to calculate a density-electron ratio., defaults to 400.0 :type minTotalElectrons: :py:class:`float` """ densityObj = self.densityObj biopdbObj = self.biopdbObj domainClouds = [] domainPool = [] domainList = [] residueList = [] atomList = [] currentRadii = radiiGlobal currentSlopes = slopesGlobal completelyOverlappedAtomTypes = collections.defaultdict(int) incompletelyOverlappedAtomTypes = collections.defaultdict(int) allAtomClouds = {} centroidDistances = [] for residue in biopdbObj.get_residues(): if residue.id[0] != ' ': # skip HETATOM residues. continue for atom in residue.child_list: resAtom = residueAtomName(atom) if resAtom not in fullAtomNameMapAtomTypeGlobal.keys() or atom.get_occupancy() == 0: continue atomClouds = densityObj.findAberrantBlobs(atom.coord, currentRadii[fullAtomNameMapAtomTypeGlobal[resAtom]], densityObj.densityCutoff) allAtomClouds[tuple(atom.coord)] = atomClouds if len(atomClouds) > 0: centroidDistances.append(min(np.linalg.norm(atom.coord - i.centroid) for i in atomClouds)) centroidDistanceCutoff = np.nanmedian(centroidDistances) + 2.5 * np.nanstd(centroidDistances) # ~99% cutoff, but this is calculated across all atom-types. for residue in biopdbObj.get_residues(): if residue.id[0] != ' ': # skip HETATOM residues. continue residuePool = [] atomCloudIndeces = {} for atom in residue.child_list: resAtom = residueAtomName(atom) if resAtom not in fullAtomNameMapAtomTypeGlobal.keys() or atom.get_occupancy() == 0: continue ## Calculate atom clouds atomClouds = allAtomClouds[tuple(atom.coord)] if len(atomClouds) == 0: continue elif len(atomClouds) == 1: bestAtomCloud = atomClouds[0] else: distances = [np.linalg.norm(atom.coord - i.centroid) for i in atomClouds] minDistance = min(distances) if minDistance > centroidDistanceCutoff: continue index = distances.index(minDistance) bestAtomCloud = atomClouds[index] for aCloud in atomClouds: aCloud.atoms = [atom] atomCloudIndeces[resAtom] = [len(residuePool)+index for index in range(len(atomClouds))] residuePool = residuePool + atomClouds ## For aggregating atom clouds into residue clouds atomList.append([residue.parent.id, residue.id[1], atom.parent.resname, atom.name, fullAtomNameMapAtomTypeGlobal[resAtom], bestAtomCloud.totalDensity / fullAtomNameMapElectronsGlobal[resAtom] / atom.get_occupancy(), len(bestAtomCloud.crsList), fullAtomNameMapElectronsGlobal[resAtom], atom.get_bfactor(), np.linalg.norm(atom.coord - bestAtomCloud.centroid), bestAtomCloud.centroid]) ## End atom loop overlap = np.zeros((len(residuePool), len(residuePool))) for i in range(len(residuePool)): for j in range(i+1, len(residuePool)): overlap[i][j] = overlap[j][i] = utils.testOverlap(residuePool[i],residuePool[j]) ## Calculate atom-type overlap completeness. Needed for parameter optimization. for atom in residue.child_list: resAtom = residueAtomName(atom) if resAtom in atomCloudIndeces: if all(any(overlap[index1][index2] for index1 in atomCloudIndeces[resAtom] for index2 in atomCloudIndeces[resAtom2]) for resAtom2 in bondedAtomsGlobal[resAtom] if resAtom2 in atomCloudIndeces): completelyOverlappedAtomTypes[fullAtomNameMapAtomTypeGlobal[resAtom]] += 1 else: incompletelyOverlappedAtomTypes[fullAtomNameMapAtomTypeGlobal[resAtom]] += 1 ## Group connected residue density clouds together from individual atom clouds resClouds = [] usedIdx = set() for startingIndex in range(len(residuePool)): if startingIndex not in usedIdx: newCluster = {index for index, o in enumerate(overlap[startingIndex]) if o} currCluster = set([startingIndex]) currCluster.update(newCluster) while len(newCluster): newCluster = {index for oldIndex in newCluster for index, o in enumerate(overlap[oldIndex]) if index not in currCluster and o} currCluster.update(newCluster) usedIdx.update(currCluster) resCloud = residuePool[currCluster.pop()].clone() for idx in currCluster: resCloud.merge(residuePool[idx]) resClouds.append(resCloud) for cloud in resClouds: resElectrons = sum([fullAtomNameMapElectronsGlobal[residueAtomName(atom)] * atom.get_occupancy() for atom in cloud.atoms]) if resElectrons >= minCloudElectrons: residueList.append([residue.parent.id, residue.id[1], residue.resname, cloud.totalDensity / resElectrons, len(cloud.crsList), resElectrons, len(cloud.crsList) * densityObj.header.unitVolume, cloud.centroid]) domainPool = domainPool + resClouds ## For aggregating residue clouds into domain clouds ## End residue ## Group connected domain density clouds together from individual residue clouds overlap = np.zeros((len(domainPool), len(domainPool))) for i in range(len(domainPool)): for j in range(i+1, len(domainPool)): overlap[i][j] = overlap[j][i] = utils.testOverlap(domainPool[i],domainPool[j]) usedIdx = set() for startingIndex in range(len(domainPool)): if startingIndex not in usedIdx: newCluster = {index for index, o in enumerate(overlap[startingIndex]) if o} currCluster = set([startingIndex]) currCluster.update(newCluster) while len(newCluster): newCluster = {index for oldIndex in newCluster for index, o in enumerate(overlap[oldIndex]) if index not in currCluster and o} currCluster.update(newCluster) usedIdx.update(currCluster) domainCloud = domainPool[currCluster.pop()].clone() for idx in currCluster: domainCloud.merge(domainPool[idx]) domainClouds.append(domainCloud) ##End domain ## Calculate densityElectronRatio, which is technically a weighted mean value now. numVoxels = 0 totalElectrons = 0 totalDensity = 0 for cloud in domainClouds: atom = cloud.atoms[0] domainElectrons = sum([fullAtomNameMapElectronsGlobal[residueAtomName(atom)] * atom.get_occupancy() for atom in cloud.atoms]) totalElectrons += domainElectrons numVoxels += len(cloud.crsList) totalDensity += cloud.totalDensity if domainElectrons >= minCloudElectrons: domainList.append([atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, cloud.totalDensity / domainElectrons, len(cloud.crsList), domainElectrons, len(cloud.crsList) * densityObj.header.unitVolume, cloud.centroid]) if totalElectrons < minTotalElectrons: return else: densityElectronRatio = totalDensity / totalElectrons domainList.sort(key=lambda x: x[3]) ## End calculate densityElectronRatio def calcSlope(data, atom_type): if len(data['chain']) <= 2 or len(np.unique(data['bfactor'])) == 1: ## Less than three data points or all b factors are the same. Should change this to something more robust like 15 unique values. return currentSlopes[atom_type] slope, intercept, r_vanue, p_value, std_err = stats.linregress(np.log(data['bfactor']), (data['adj_density_electron_ratio']-densityElectronRatio)/densityElectronRatio) return currentSlopes[atom_type] if p_value > 0.05 else slope try: dataType = np.dtype([('chain', np.dtype(('U', 20))), ('residue_number', int), ('residue_name', np.dtype(('U', 10)) ), ('atom_name', np.dtype(('U', 10)) ), ('atom_type', np.dtype(('U', atomTypeLengthGlobal)) ), ('density_electron_ratio', float), ('num_voxels', int), ('electrons', int), ('bfactor', float), ('centroid_distance', float), ('centroid_xyz', float, (3,)), ('adj_density_electron_ratio', float), ('domain_fraction', float), ('corrected_fraction', float), ('corrected_density_electron_ratio', float), ('volume', float)]) atoms = np.asarray([tuple(atom+[0.0 for x in range(5)]) for atom in atomList],dataType) # must pass in list of tuples to create ndarray correctly. if not np.isnan(atoms['centroid_distance']).all(): centroidCutoff = np.nanmedian(atoms['centroid_distance']) + np.nanstd(atoms['centroid_distance']) * 2 atoms = atoms[atoms['centroid_distance'] < centroidCutoff] atom_types = np.unique(atoms['atom_type']) medians = { column : {atom_type : np.nanmedian(atoms[column][atoms['atom_type'] == atom_type]) for atom_type in atom_types} for column in ['num_voxels'] } medians_translator = np.vectorize(lambda column,atom_type : medians[column][atom_type]) ## Normalize by volume (numVoxels) atoms['adj_density_electron_ratio'] = atoms['density_electron_ratio'] / atoms['num_voxels'] * medians_translator('num_voxels', atoms['atom_type']) atoms['volume'] = atoms['num_voxels'] * densityObj.header.unitVolume medians.update({column : {atom_type : np.nanmedian(atoms[column][atoms['atom_type'] == atom_type]) for atom_type in atom_types} for column in ['density_electron_ratio', 'centroid_distance', 'adj_density_electron_ratio', 'volume']}) medians['bfactor'] = {atom_type : np.nanmedian(atoms['bfactor'][(atoms['atom_type'] == atom_type) & (atoms['bfactor'] > 0)]) for atom_type in atom_types} atoms['bfactor'][atoms['bfactor'] <= 0] = medians_translator('bfactor', atoms['atom_type'])[atoms['bfactor'] <= 0] medians['slopes'] = {atom_type : calcSlope(atoms[atoms['atom_type'] == atom_type], atom_type) for atom_type in atom_types} ## Correct by b-factor atoms['domain_fraction'] = (atoms['adj_density_electron_ratio'] - densityElectronRatio) / densityElectronRatio atoms['corrected_fraction'] = atoms['domain_fraction'] - (np.log(atoms['bfactor']) - np.log(medians_translator('bfactor', atoms['atom_type']))) * medians_translator('slopes', atoms['atom_type']) atoms['corrected_density_electron_ratio'] = atoms['corrected_fraction'] * densityElectronRatio + densityElectronRatio medians.update({column : {atom_type : np.nanmedian(atoms[column][atoms['atom_type'] == atom_type]) for atom_type in atom_types} for column in ['domain_fraction', 'corrected_fraction', 'corrected_density_electron_ratio']}) except: return self._densityElectronRatio = densityElectronRatio self._numVoxelsAggregated = numVoxels self._totalAggregatedElectrons = totalElectrons self._totalAggregatedDensity = totalDensity self._medians = medians self._atomCloudDescriptions = atoms self._residueCloudDescriptions = residueList self._domainCloudDescriptions = domainList self._atomTypeOverlapCompleteness = completelyOverlappedAtomTypes self._atomTypeOverlapIncompleteness = incompletelyOverlappedAtomTypes
[docs] def medianAbsFoFc(self): """Calculates median absolute values for the Fo and Fc maps less than 1 sigma. These values should be comparable, i.e. low relative difference, for RSCC and RSR metric calculations. :return: tuple of median abs values from fo and fc maps respectively. :rtype: :py:class:`tuple` """ fo = self.fo fc = self.fc foDensityCutoff = fo.meanDensity + 1.0 * fo.stdDensity fcDensityCutoff = fc.meanDensity + 1.0 * fc.stdDensity ncrs = fo.header.uniqueNcrs densityPairs = [ (density, diffDensity) for density, diffDensity in ((utils.getPointDensityFromCrs(fo, crs),utils.getPointDensityFromCrs(fc, crs)) for crs in itertools.product(range(ncrs[0]),range(ncrs[1]),range(ncrs[2]))) if abs(density) < foDensityCutoff and abs(diffDensity) < fcDensityCutoff ] foValues = np.asarray([pair[0] for pair in densityPairs]) fcValues = np.asarray([pair[1] for pair in densityPairs]) return (np.median(np.abs(foValues)),np.median(np.abs(fcValues)))
residueMetricsHeaderList = ['chain', 'residue_number', 'residue_name', "rscc", "rsr", "mean_occupancy", "occupancy_weighted_mean_bfactor"]
[docs] def residueMetrics(self, residueList=None): """RETURNS rscc and rsr statistics for each residue using the Fo and Fc density maps. :param residueList: :type residueList: :py:class:`list`, optional :return: results :rtype: :py:class:`list` """ resolution = self.biopdbObj.header['resolution'] radius = 0.7 if 0.6 <= resolution <= 3: radius = (resolution - 0.6) / 3 + 0.7 elif resolution > 3: radius = resolution * 0.5 if residueList == None: residueList = list(self.biopdbObj.get_residues()) residueResults = [] for residue in residueList: crsList = set() bfactorWeightedSum = occupancySum = 0.0 for atom in residue.child_list: crsList.update(utils.getSphereCrsFromXyz(self.fo, atom.coord, radius, 0.0)) bfactorWeightedSum += atom.get_bfactor() * atom.get_occupancy() occupancySum += atom.get_occupancy() (rscc, rsr) = self.calculateRsccRsrMetrics(crsList) residueResults.append([residue.parent.id, residue.id[1], residue.resname, rscc, rsr, occupancySum / len(residue.child_list), bfactorWeightedSum / occupancySum]) return residueResults
atomMetricsHeaderList = ['chain', 'residue_number', 'residue_name', "atom_name", "symmetry", "xyz", "rscc", "rsr", "occupancy", "bfactor"]
[docs] def atomMetrics(self, atomList=None): """RETURNS rscc and rsr statistics for each residue using the Fo and Fc density maps. :param atomList: :type atomList: :py:class:`list`, optional :return: results :rtype: :py:class:`list` """ resolution = self.biopdbObj.header['resolution'] radius = 0.7 if 0.6 <= resolution <= 3: radius = (resolution - 0.6) / 3 + 0.7 elif resolution > 3: radius = resolution * 0.5 if atomList == None: atomList = self.asymmetryAtoms atomResults = [] for atom in atomList: crsList = set(utils.getSphereCrsFromXyz(self.fo, atom.coord, radius, 0.0)) (rscc, rsr) = self.calculateRsccRsrMetrics(crsList) atomResults.append([atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.symmetry, atom.coord, rscc, rsr, atom.get_occupancy(), atom.get_bfactor()]) return atomResults
[docs] def calculateRsccRsrMetrics(self, crsList): """Calculates and returns RSCC and RSR metrics. This method of calculating RSCC and RSR assumes that the Fo and Fc maps are appropriately scaled. Comparison of median absolute values below one sigma should be quite similar between Fo and Fc maps. :param crsList: :type crsList: :py:class:`list`, :py:class:`set` :return: rscc_rsr_arrays_tuple :rtype: :py:obj:`tuple` """ fo = self.fo fc = self.fc foDensity = np.asarray([utils.getPointDensityFromCrs(fo,i) for i in crsList]) fcDensity = np.asarray([utils.getPointDensityFromCrs(fc, i) for i in crsList]) rscc = stats.stats.pearsonr(foDensity, fcDensity)[0] rsr = sum(abs(foDensity - fcDensity)) / sum(abs(foDensity + fcDensity)) return (rscc,rsr)
[docs] def _calculateSymmetryAtoms(self): """Calculate all the symmetry and nearby cells and keep those have at least on atom within 5 grid points of the non-repeating crs boundary. Ref: Biomolecular Crystallography: Principles, Practice, and Application to Structural Biology by Bernhard Rupp. Orthogonalization matrix O and deororthogonalization matrix O' are from :class:`pdb_eda.ccp4` object. Rotation matrix R and Translation matrix T is from :class:`pdb_eda.pdbParser` object. The neighbouring cells can be calculated using formula, X' = O(O'(RX + T) + T') = OO'(RX+T) + OT' = RX + T + O[-1/0/1,-1/0/1,-1/0/1]. Assign the list of :class:`pdb_eda.densityAnalysis.symAtom` instances to `densityAnalysis.symmetryAtoms` data member """ densityObj = self.densityObj biopdbObj = self.biopdbObj pdbObj = self.pdbObj ## For inRangeAtoms, the min/max range of xyz axes (the circumscribed box) ncrs = densityObj.header.ncrs orginalDensityBox = [densityObj.header.crs2xyzCoord(i) for i in [[c, r, s] for c in [0, ncrs[0]-1] for r in [0, ncrs[1]-1] for s in [0, ncrs[2]-1]]] xs = sorted([i[0] for i in orginalDensityBox]) ys = sorted([i[1] for i in orginalDensityBox]) zs = sorted([i[2] for i in orginalDensityBox]) allAtoms = utils.createSymmetryAtoms(list(biopdbObj.get_atoms()), pdbObj.header.rotationMats, densityObj.header.orthoMat, xs,ys,zs) self._symmetryAtoms = allAtoms self._symmetryAtomCoords = np.asarray([atom.coord for atom in allAtoms]) self._symmetryOnlyAtoms = [atom for atom in allAtoms if atom.symmetry != (0,0,0,0)] self._symmetryOnlyAtomCoords = np.asarray([atom.coord for atom in self._symmetryOnlyAtoms]) self._asymmetryAtoms = [atom for atom in allAtoms if atom.symmetry == (0,0,0,0)] self._asymmetryAtomCoords = np.asarray([atom.coord for atom in self._asymmetryAtoms])
blobStatisticsHeader = ['distance_to_atom', 'sign', 'electrons_of_discrepancy', 'num_voxels', 'volume', 'chain', 'residue_number', 'residue_name', 'atom_name', 'atom_symmetry', 'atom_xyz', 'centroid_xyz']
[docs] def calculateAtomSpecificBlobStatistics(self, blobList): """Calculate atom-specific blob statistics. :param blobList: list of blobs to calculate statistics for. :type blobList: :py:class:`list` :return blobStats: Difference density map statistics. :rtype: :py:class:`list` """ symmetryAtoms = self.symmetryAtoms symmetryAtomCoords = self.symmetryAtomCoords if not self.densityElectronRatio: raise RuntimeError("Failed to calculate densityElectronRatio, probably due to total aggregated electrons less than the minimum.") densityElectronRatio = self.densityElectronRatio blobStats = [] for blob in blobList: centroid = np.array(blob.centroid).reshape((1, 3)) symmetryDistances = scipy.spatial.distance.cdist(centroid, symmetryAtomCoords) atom = symmetryAtoms[np.argmin(symmetryDistances[0])] # closest atom sign = '+' if blob.totalDensity >= 0 else '-' blobStats.append([symmetryDistances.min(), sign, abs(blob.totalDensity / densityElectronRatio), len(blob.crsList), blob.volume, atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.symmetry, atom.coord, blob.centroid]) return blobStats
# Headers that match the order of the results regionDensityHeader = [ "actual_significant_regional_density", "num_electrons_actual_significant_regional_density" ] atomRegionDensityHeader = ['model', 'chain', 'residue_number', 'residue_name', "atom_name", "occupancy"] + regionDensityHeader symmetryAtomRegionDensityHeader = ['model', 'chain', 'residue_number', 'residue_name', "atom_name", "symmetry", "atom_xyz", "fully_within_density_map"] + regionDensityHeader residueRegionDensityHeader = ['model', 'chain', 'residue_number', 'residue_name', "mean_occupancy"] + regionDensityHeader
[docs] def calculateAtomRegionDensity(self, radius, numSD=1.5, type="", useOptimizedRadii=False): """Calculates significant region density in a given radius of each atom. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance, defaults to 1.5 :type numSD: py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :return diffMapRegionStats: density map region statistics per atom. :rtype: :py:class:`list` """ biopdbObj = self.biopdbObj atoms = list(biopdbObj.get_atoms()) if type: atoms = [atom for atom in atoms if atom.name == type] results = [] for atom in atoms: resAtom = residueAtomName(atom) testRadius = radiiGlobal[fullAtomNameMapAtomTypeGlobal[resAtom]] if useOptimizedRadii and resAtom in fullAtomNameMapAtomTypeGlobal.keys() else radius result = self.calculateRegionDensity([atom.coord], testRadius, numSD) results.append([atom.parent.parent.parent.id, atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.get_occupancy()] + result) return results
[docs] def calculateSymmetryAtomRegionDensity(self, radius, numSD=1.5, type="", useOptimizedRadii=False): """Calculates significant region density in a given radius of each symmetry atom. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance., defaults to 1.5 :type numSD: :py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :return diffMapRegionStats: density map region statistics per atom. :rtype: :py:class:`list` """ atoms = self.symmetryAtoms if type: atoms = [atom for atom in atoms if atom.name == type] results = [] for atom in atoms: resAtom = residueAtomName(atom) testRadius = radiiGlobal[fullAtomNameMapAtomTypeGlobal[resAtom]] if useOptimizedRadii and resAtom in fullAtomNameMapAtomTypeGlobal.keys() else radius (result,valid) = self.calculateRegionDensity([atom.coord], testRadius, numSD, testValidCrs=True) results.append([atom.parent.parent.parent.id, atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.symmetry, atom.coord, valid] + result) return results
[docs] def calculateResidueRegionDensity(self, radius, numSD=1.5, type="", atomMask=None, useOptimizedRadii=False): """Calculates significant region density in a given radius of each residue. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance., defaults to 1.5 :type numSD: :py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :param atomMask: residue specific atom mask. :type type: :py:class:`dict`, optional :return diffMapRegionStats: density map region statistics per residue. :rtype: :py:class:`list` """ biopdbObj = self.biopdbObj results = [] residues = list(biopdbObj.get_residues()) if type: residues = [residue for residue in residues if residue.resname == type] for residue in residues: atoms = [atom for atom in residue.get_atoms() if not atomMask or residue.resname not in atomMask or atom.name in atomMask[residue.resname]] if atoms: xyzCoordList = [atom.coord for atom in atoms] meanOccupancy = np.mean([atom.get_occupancy() for atom in atoms]) if useOptimizedRadii: resAtoms = [residueAtomName(atom) for atom in atoms] radii = [radiiGlobal[fullAtomNameMapAtomTypeGlobal[resAtom]] if resAtom in fullAtomNameMapAtomTypeGlobal.keys() else radius for resAtom in resAtoms] result = self.calculateRegionDensity(xyzCoordList, radii, numSD) else: result = self.calculateRegionDensity(xyzCoordList, radius, numSD) results.append([residue.parent.parent.id, residue.parent.id, residue.id[1], residue.resname, meanOccupancy ] + result) return results
[docs] def calculateRegionDensity(self, xyzCoordList, radius, numSD=1.5, testValidCrs=False): """Calculate region-specific density from the electron density matrix. :param xyzCoordLists: single xyz coordinate or a list of xyz coordinates. :type xyzCoordList: :py:class:`list` :param radius: the search radius or list of search radii. :type radius: :py:class:`float` or :py:class:`list` :param numSD: number of standard deviations of significance., defaults to 1.5 :type numSD: :py:class:`float` :param testValidCrs: whether to test crs are valid and return the results., defaults to :py:obj:`False` :type testValidCrs: :py:class:`bool` :return diffMapRegionStats: density map region statistics and optional validCrs result. :rtype: :py:class:`list`, :py:class:`tuple` """ if not self.densityElectronRatio: raise RuntimeError("Failed to calculate densityElectronRatio, probably due to total aggregated electrons less than the minimum.") densityElectronRatio = self.densityElectronRatio densityObj = self.densityObj densityCutoff = densityObj.meanDensity + numSD * densityObj.stdDensity # observed significant regional density blue = densityObj.findAberrantBlobs(xyzCoordList, radius, densityCutoff) actual_sig_regional_density = sum([blob.totalDensity for blob in blue]) num_electrons_actual_sig_regional_density = actual_sig_regional_density / densityElectronRatio result = [ actual_sig_regional_density, num_electrons_actual_sig_regional_density ] if testValidCrs: return (result, utils.testValidXyzList(densityObj, xyzCoordList, radius)) else: return result
# Headers that match the order of the results regionDiscrepancyHeader = [ "actual_abs_significant_regional_discrepancy", "num_electrons_actual_abs_significant_regional_discrepancy", "expected_abs_significant_regional_discrepancy", "num_electrons_expected_abs_significant_regional_discrepancy", "actual_significant_regional_discrepancy", "num_electrons_actual_significant_regional_discrepancy", "actual_positive_significant_regional_discrepancy", "num_electrons_actual_positive_significant_regional_discrepancy", "actual_negative_significant_regional_discrepancy", "num_electrons_actual_negative_significant_regional_discrepancy" ] atomRegionDiscrepancyHeader = ['model', 'chain', 'residue_number', 'residue_name', "atom_name", "occupancy"] + regionDiscrepancyHeader symmetryAtomRegionDiscrepancyHeader = ['model', 'chain', 'residue_number', 'residue_name', "atom_name", "symmetry", "atom_xyz", "fully_within_density_map"] + regionDiscrepancyHeader residueRegionDiscrepancyHeader = ['model', 'chain', 'residue_number', 'residue_name', "mean_occupancy"] + regionDiscrepancyHeader
[docs] def calculateAtomRegionDiscrepancies(self, radius, numSD=3.0, type=""): """Calculates significant region discrepancies in a given radius of each atom. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance, defaults to 3.0 :type numSD: py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :return diffMapRegionStats: Difference density map region statistics per atom. :rtype: :py:class:`list` """ biopdbObj = self.biopdbObj atoms = list(biopdbObj.get_atoms()) if type: atoms = [atom for atom in atoms if atom.name == type] results = [] for atom in atoms: result = self.calculateRegionDiscrepancy([atom.coord], radius, numSD) results.append([atom.parent.parent.parent.id, atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.get_occupancy()] + result) return results
[docs] def calculateSymmetryAtomRegionDiscrepancies(self, radius, numSD=3.0, type=""): """Calculates significant region discrepancies in a given radius of each symmetry atom. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance., defaults to 3.0 :type numSD: :py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :return diffMapRegionStats: Difference density map region statistics per atom. :rtype: :py:class:`list` """ atoms = self.symmetryAtoms if type: atoms = [atom for atom in atoms if atom.name == type] results = [] for atom in atoms: (result,valid) = self.calculateRegionDiscrepancy([atom.coord], radius, numSD, testValidCrs=True) results.append([atom.parent.parent.parent.id, atom.parent.parent.id, atom.parent.id[1], atom.parent.resname, atom.name, atom.symmetry, atom.coord, valid] + result) return results
[docs] def calculateResidueRegionDiscrepancies(self, radius, numSD=3.0, type="", atomMask=None): """Calculates significant region discrepancies in a given radius of each residue. :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance., defaults to 3.0 :type numSD: :py:class:`float` :param type: atom type to filter on., defaults to "" :type type: :py:class:`str` :param atomMask: residue specific atom mask. :type type: :py:class:`dict`, optional :return diffMapRegionStats: Difference density map region statistics per residue. :rtype: :py:class:`list` """ biopdbObj = self.biopdbObj results = [] residues = list(biopdbObj.get_residues()) if type: residues = [residue for residue in residues if residue.resname == type] for residue in residues: atoms = [atom for atom in residue.get_atoms() if not atomMask or (residue.resname in atomMask and atom.name in atomMask[residue.resname])] xyzCoordList = [atom.coord for atom in atoms] meanOccupancy = np.mean([atom.get_occupancy() for atom in atoms]) result = self.calculateRegionDiscrepancy(xyzCoordList, radius, numSD) results.append([residue.parent.parent.id, residue.parent.id, residue.id[1], residue.resname, meanOccupancy ] + result) return results
[docs] def calculateRegionDiscrepancy(self, xyzCoordList, radius, numSD=3.0, testValidCrs=False): """Calculate region-specific discrepancy from the difference density matrix. :param xyzCoordLists: single xyz coordinate or a list of xyz coordinates. :type xyzCoordList: :py:class:`list` :param radius: the search radius. :type radius: :py:class:`float` :param numSD: number of standard deviations of significance., defaults to 3.0 :type numSD: :py:class:`float` :param testValidCrs: whether to test crs are valid and return the results., defaults to :py:obj:`False` :type testValidCrs: :py:class:`bool` :return diffMapRegionStats: Difference density map region statistics and optional validCrs result. :rtype: :py:class:`list`, :py:class:`tuple` """ if not self.densityElectronRatio: raise RuntimeError("Failed to calculate densityElectronRatio, probably due to total aggregated electrons less than the minimum.") densityElectronRatio = self.densityElectronRatio diffDensityObj = self.diffDensityObj diffDensityCutoff = diffDensityObj.meanDensity + numSD * diffDensityObj.stdDensity # observed significant regional discrepancy green = diffDensityObj.findAberrantBlobs(xyzCoordList, radius, diffDensityCutoff) red = diffDensityObj.findAberrantBlobs(xyzCoordList, radius, -1.0 * diffDensityCutoff) actual_positive_sig_regional_discrep = sum([blob.totalDensity for blob in green]) num_electrons_actual_positive_sig_regional_discrep = actual_positive_sig_regional_discrep / densityElectronRatio actual_negative_sig_regional_discrep = sum([blob.totalDensity for blob in red]) num_electrons_actual_negative_sig_regional_discrep = actual_negative_sig_regional_discrep / densityElectronRatio actual_sig_regional_discrep = actual_positive_sig_regional_discrep + actual_negative_sig_regional_discrep num_electrons_actual_sig_regional_discrep = actual_sig_regional_discrep / densityElectronRatio actual_abs_sig_regional_discrep = abs(actual_positive_sig_regional_discrep) + abs(actual_negative_sig_regional_discrep) num_electrons_actual_abs_sig_regional_discrep = actual_abs_sig_regional_discrep / densityElectronRatio # expected absolute significant regional discrepancy total_abs_sig_discrep = diffDensityObj.getTotalAbsDensity(diffDensityCutoff) total_voxel_count = len(diffDensityObj.densityArray) avg_abs_vox_discrep = total_abs_sig_discrep / total_voxel_count regional_voxel_count = len(utils.getSphereCrsFromXyzList(diffDensityObj, xyzCoordList, radius)) expected_abs_sig_regional_discrep = avg_abs_vox_discrep * regional_voxel_count num_electrons_expected_abs_sig_regional_discrep = expected_abs_sig_regional_discrep / densityElectronRatio result = [ actual_abs_sig_regional_discrep, num_electrons_actual_abs_sig_regional_discrep, expected_abs_sig_regional_discrep, num_electrons_expected_abs_sig_regional_discrep, actual_sig_regional_discrep, num_electrons_actual_sig_regional_discrep, actual_positive_sig_regional_discrep, num_electrons_actual_positive_sig_regional_discrep, actual_negative_sig_regional_discrep, num_electrons_actual_negative_sig_regional_discrep ] if testValidCrs: return (result, utils.testValidXyzList(diffDensityObj, xyzCoordList, radius)) else: return result
[docs] def estimateF000(self): """Estimate the F000 term as sum of all electrons over the unit cell volume :return: estimatedF000 :rtype: :py:class:`float` """ densityObj = self.densityObj biopdbObj = self.biopdbObj pdbObj = self.pdbObj if not elementElectronsGlobal: loadF000Parameters() totalElectrons = 0 allAtoms = list(biopdbObj.get_atoms()) for atom in allAtoms: fullAtomName = residueAtomName(atom) if fullAtomName in masterFullAtomNameMapElectronsGlobal: totalElectrons += masterFullAtomNameMapElectronsGlobal[fullAtomName] elif atom.element in elementElectronsGlobal: totalElectrons += elementElectronsGlobal[atom.element] + 1 # + 1 as an estimate for the number of H. totalElectrons *= len(pdbObj.header.rotationMats) asuVolume = densityObj.header.unitVolume * densityObj.header.nintervalX * densityObj.header.nintervalY * densityObj.header.nintervalZ return totalElectrons/asuVolume
[docs]def residueAtomName(atom): """Returns a combined residue and atom name used to select an atom type. :param atom: :type atom: :class:`Bio.PDB.atom` :return: fullAtomName :rtype: :py:class:`str` """ return atom.parent.resname.strip() + '_' + atom.name