Populate a Table with Microspecies

    This script cycles through a structures table and decides if a structure has a pka that lies between pH 4.5 and 9. If so, it generates the microspecies and inserts it into another table, as well as the original. It also inserts the pH of the maximum occupation of that microspecies, and which pH range it fell in (pH 4.5 - 9.5 split into different ranges).

    This script shows the use of chemJEP and Evaluator for calling chemical terms from Groovy.

    
    /* Microspecies Populating Script
    *
    * Usage: This script needs two tables connected via a one to many relationship, with the microspecies table as the child.
    *
    * @author Erin Bolstad (ebolstad@chemaxon.com)
    * Dec 2011
    */
    
    import com.im.commons.progress.*
    import com.im.df.api.chem.MarvinStructure
    import chemaxon.jep.Evaluator
    import chemaxon.jep.context.MolContext
    import chemaxon.formats.MolExporter
    
    def ety = dataTree.rootVertex.entity
    def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
    def parentVS = rs.getVertexState(dataTree.rootVertex)
    def molIDs = parentVS.getIds()
    def molFld = ety.fields.items.find { it.name == "Structure" }
    def protoEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == 'Microspecies' }
    def protoEntity = protoEdge.destination.entity
    def protoEdp = protoEntity.schema.dataProvider.getEntityDataProvider(protoEntity)
    def protoIdFld = protoEntity.fields.items.find { it.name == 'ID' }
    def protoHiFld = protoEntity.fields.items.find { it.name == 'PH_HI' }
    def protoMidFld = protoEntity.fields.items.find { it.name == 'PH_MID' }
    def protoLoFld = protoEntity.fields.items.find { it.name == 'PH_LO' }
    def protoMaxOcFld = protoEntity.fields.items.find { it.name == 'PH_MAX_OCCUPANCY' }
    def protoStFld = protoEntity.fields.items.find { it.name == 'Structure' }
    def protoRefFld = protoEntity.fields.items.find { it.name == 'PH_REF' }
    
    def lock = protoEdp.lockable.withLock('inserting data'){ envRW ->
        i=0
        molIDs.each { id ->
    
            //Fetch in the molecule from the structures table
            def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
            getMol = molData[id][molFld.id]
            nativeMol = getMol.getNative()
            def nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
    
            def pkaArray = []
    
            // Calculate the first acidic pKa
            Evaluator evaluator = new Evaluator()
            chemJEP = evaluator.compile("pKa('acidic','1')", MolContext.class)
            MolContext context = new MolContext()
            context.setMolecule(nativeMol)
            aresult = chemJEP.evaluate(context)
            String nanCheck = aresult
    
            n=2
            // pKa evaluator will return NaN if no more pKas to calculate, cycle through pKas till 9.5 hit or till NaN is returned
            while ((aresult < 9.5) && (nanCheck !="NaN")){
                pkaArray << aresult
                chemJEP = evaluator.compile("pKa('acidic','$n')", MolContext.class)
                context.setMolecule(nativeMol)
                aresult = chemJEP.evaluate(context)
                nanCheck = aresult
                n++
            }
    
            //Calculate the first basic pKa
            chemJEP = evaluator.compile("pKa('basic','1')", MolContext.class)
            context.setMolecule(nativeMol)
            bresult = chemJEP.evaluate(context)
            nanCheck = bresult
    
            n=2
            //Cycle through basic pKas until 4.5 hit or Nan is returned
            while ((nanCheck != "NaN") && (bresult > 4.5)) {
                pkaArray << bresult
                chemJEP = evaluator.compile("pKa('basic','$n')", MolContext.class)
                context.setMolecule(nativeMol)
                bresult = chemJEP.evaluate(context)
                nanCheck = bresult
                n++
            }
    
            i++
            vals = [:]
    
            // Build insert array with original molecule for insert
            vals = [(protoStFld.id):getMol]
            vals.putAt(protoRefFld.id, true)
            vals.putAt(protoIdFld.id, id)
    
            //Insert the row into Microspecies table
            protoEdp.insert(vals, null, envRW)
            print "Inserted native structure \n"
    
            arraySize = pkaArray.size()
    
            // If there is a pKa in the array of pKas calculated between 4.5 and 9, enter the loop.
            if (arraySize > 0) {
    
                //Sort array of pKas
                sortedArray = pkaArray.sort()
    
                // Get the pKas at the halfway points for 4.5 and pKa1, pKax and 9 for microspecies calculation
                speciesPkas = [:]
                m=0
                s= arraySize - 1
                msLo = ((sortedArray[0] - 4.5) / 2) + 4.5
                msHi = ((9.5 - sortedArray[s]) / 2) + sortedArray[s]
    
                // Calculate the SMILES for the microspecies at the pKa
                chemJEP = evaluator.compile("majorMicrospecies('$msLo')", MolContext.class)
                context.setMolecule(nativeMol)
                mLoresult = chemJEP.evaluate(context)
                nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
                def loMolStr = MolExporter.exportToFormat(mLoresult, "smiles")
    
                chemJEP = evaluator.compile("majorMicrospecies('$msHi')", MolContext.class)
                context.setMolecule(nativeMol)
                msHiresult = chemJEP.evaluate(context)
                nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
                hiMolStr = MolExporter.exportToFormat(msHiresult, "smiles")
    
                // Check that this microspecies is not the same one as the input molecule
                if (nativeStr != loMolStr) {
                    def loMvMol = new MarvinStructure(loMolStr, false)
                    speciesPkas.put(msLo, loMvMol)
                }
    
                if (nativeStr != hiMolStr) {
                    def hiMvMol = new MarvinStructure(hiMolStr, false)
                    speciesPkas.put(msHi, hiMvMol)
                }
                m++
    
                // For arrays with more than one pKa, calculate the species at the halfway points of each pKa
                while (m < arraySize) {
                    r = m - 1
                    msX = ((sortedArray[m] - sortedArray[r]) / 2) + sortedArray[r]
    
                    chemJEP = evaluator.compile("majorMicrospecies('$msX')", MolContext.class)
                    context.setMolecule(nativeMol)
                    msXresult = chemJEP.evaluate(context)
                    nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
                    xMolStr = MolExporter.exportToFormat(msXresult, "smiles")
    
                    if (nativeStr != xMolStr) {
                        def xMvMol = new MarvinStructure(xMolStr, false)
                        speciesPkas.put(msX, xMvMol)
                    }
                    m++
                }
    
                //For each microspecies, determine which pH range it falls in and flag it. Put the max occupancy pH in the array.
                speciesPkas.each { maxph, struc ->
                    vals = [(protoStFld.id):struc]
                    vals.putAt(protoIdFld.id, id)
                    def flMaxph = maxph.floatValue()
    
                    if ((4.5 < maxph) && (maxph < 7)){
                        vals.putAt(protoLoFld.id, true)
                    }
                    if ((5.75 < maxph) && (maxph < 8.25)){
                        vals.putAt(protoMidFld.id, true)
                    }
                    if ((7 < maxph) && (maxph < 9.5)){
                        vals.putAt(protoHiFld.id, true)
                    }
                    vals.putAt(protoMaxOcFld.id, flMaxph)
    
                    // Insert the microspecies
                    protoEdp.insert(vals, null, envRW)
                    print "Inserted microspecies \n"
                }
            }
        }
    }