Populate a Table with Microspecies¶

This script cycles through a structures table and decides if a structure has a pka that lies between pH 4.5 and 9. If so, it generates the microspecies and inserts it into another table, as well as the original. It also inserts the pH of the maximum occupation of that microspecies, and which pH range it fell in (pH 4.5 - 9.5 split into different ranges).
This script shows the use of chemJEP and Evaluator for calling chemical terms from Groovy.
/* Microspecies Populating Script
*
* Usage: This script needs two tables connected via a one to many relationship, with the microspecies table as the child.
*
* @author Erin Bolstad (ebolstad@chemaxon.com)
* Dec 2011
*/

import com.im.commons.progress.*
import com.im.df.api.chem.MarvinStructure
import chemaxon.jep.Evaluator
import chemaxon.jep.context.MolContext
import chemaxon.formats.MolExporter

def ety = dataTree.rootVertex.entity
def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def molIDs = parentVS.getIds()
def molFld = ety.fields.items.find { it.name == "Structure" }
def protoEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == 'Microspecies' }
def protoEntity = protoEdge.destination.entity
def protoEdp = protoEntity.schema.dataProvider.getEntityDataProvider(protoEntity)
def protoIdFld = protoEntity.fields.items.find { it.name == 'ID' }
def protoHiFld = protoEntity.fields.items.find { it.name == 'PH_HI' }
def protoMidFld = protoEntity.fields.items.find { it.name == 'PH_MID' }
def protoLoFld = protoEntity.fields.items.find { it.name == 'PH_LO' }
def protoMaxOcFld = protoEntity.fields.items.find { it.name == 'PH_MAX_OCCUPANCY' }
def protoStFld = protoEntity.fields.items.find { it.name == 'Structure' }
def protoRefFld = protoEntity.fields.items.find { it.name == 'PH_REF' }

def lock = protoEdp.lockable.withLock('inserting data'){ envRW ->
    i=0
    molIDs.each { id ->

        //Fetch in the molecule from the structures table
        def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
        getMol = molData[id][molFld.id]
        nativeMol = getMol.getNative()
        def nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")

        def pkaArray = []

        // Calculate the first acidic pKa
        Evaluator evaluator = new Evaluator()
        chemJEP = evaluator.compile("pKa('acidic','1')", MolContext.class)
        MolContext context = new MolContext()
        context.setMolecule(nativeMol)
        aresult = chemJEP.evaluate(context)
        String nanCheck = aresult

        n=2
        // pKa evaluator will return NaN if no more pKas to calculate, cycle through pKas till 9.5 hit or till NaN is returned
        while ((aresult < 9.5) && (nanCheck !="NaN")){
            pkaArray << aresult
            chemJEP = evaluator.compile("pKa('acidic','$n')", MolContext.class)
            context.setMolecule(nativeMol)
            aresult = chemJEP.evaluate(context)
            nanCheck = aresult
            n++
        }

        //Calculate the first basic pKa
        chemJEP = evaluator.compile("pKa('basic','1')", MolContext.class)
        context.setMolecule(nativeMol)
        bresult = chemJEP.evaluate(context)
        nanCheck = bresult

        n=2
        //Cycle through basic pKas until 4.5 hit or Nan is returned
        while ((nanCheck != "NaN") && (bresult > 4.5)) {
            pkaArray << bresult
            chemJEP = evaluator.compile("pKa('basic','$n')", MolContext.class)
            context.setMolecule(nativeMol)
            bresult = chemJEP.evaluate(context)
            nanCheck = bresult
            n++
        }

        i++
        vals = [:]

        // Build insert array with original molecule for insert
        vals = [(protoStFld.id):getMol]
        vals.putAt(protoRefFld.id, true)
        vals.putAt(protoIdFld.id, id)

        //Insert the row into Microspecies table
        protoEdp.insert(vals, null, envRW)
        print "Inserted native structure \n"

        arraySize = pkaArray.size()

        // If there is a pKa in the array of pKas calculated between 4.5 and 9, enter the loop.
        if (arraySize > 0) {

            //Sort array of pKas
            sortedArray = pkaArray.sort()

            // Get the pKas at the halfway points for 4.5 and pKa1, pKax and 9 for microspecies calculation
            speciesPkas = [:]
            m=0
            s= arraySize - 1
            msLo = ((sortedArray[0] - 4.5) / 2) + 4.5
            msHi = ((9.5 - sortedArray[s]) / 2) + sortedArray[s]

            // Calculate the SMILES for the microspecies at the pKa
            chemJEP = evaluator.compile("majorMicrospecies('$msLo')", MolContext.class)
            context.setMolecule(nativeMol)
            mLoresult = chemJEP.evaluate(context)
            nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
            def loMolStr = MolExporter.exportToFormat(mLoresult, "smiles")

            chemJEP = evaluator.compile("majorMicrospecies('$msHi')", MolContext.class)
            context.setMolecule(nativeMol)
            msHiresult = chemJEP.evaluate(context)
            nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
            hiMolStr = MolExporter.exportToFormat(msHiresult, "smiles")

            // Check that this microspecies is not the same one as the input molecule
            if (nativeStr != loMolStr) {
                def loMvMol = new MarvinStructure(loMolStr, false)
                speciesPkas.put(msLo, loMvMol)
            }

            if (nativeStr != hiMolStr) {
                def hiMvMol = new MarvinStructure(hiMolStr, false)
                speciesPkas.put(msHi, hiMvMol)
            }
            m++

            // For arrays with more than one pKa, calculate the species at the halfway points of each pKa
            while (m < arraySize) {
                r = m - 1
                msX = ((sortedArray[m] - sortedArray[r]) / 2) + sortedArray[r]

                chemJEP = evaluator.compile("majorMicrospecies('$msX')", MolContext.class)
                context.setMolecule(nativeMol)
                msXresult = chemJEP.evaluate(context)
                nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
                xMolStr = MolExporter.exportToFormat(msXresult, "smiles")

                if (nativeStr != xMolStr) {
                    def xMvMol = new MarvinStructure(xMolStr, false)
                    speciesPkas.put(msX, xMvMol)
                }
                m++
            }

            //For each microspecies, determine which pH range it falls in and flag it. Put the max occupancy pH in the array.
            speciesPkas.each { maxph, struc ->
                vals = [(protoStFld.id):struc]
                vals.putAt(protoIdFld.id, id)
                def flMaxph = maxph.floatValue()

                if ((4.5 < maxph) && (maxph < 7)){
                    vals.putAt(protoLoFld.id, true)
                }
                if ((5.75 < maxph) && (maxph < 8.25)){
                    vals.putAt(protoMidFld.id, true)
                }
                if ((7 < maxph) && (maxph < 9.5)){
                    vals.putAt(protoHiFld.id, true)
                }
                vals.putAt(protoMaxOcFld.id, flMaxph)

                // Insert the microspecies
                protoEdp.insert(vals, null, envRW)
                print "Inserted microspecies \n"
            }
        }
    }
}