This script cycles through a structures table and decides if a structure has a pka that lies between pH 4.5 and 9. If so, it generates the microspecies and inserts it into another table, as well as the original. It also inserts the pH of the maximum occupation of that microspecies, and which pH range it fell in (pH 4.5 - 9.5 split into different ranges).
This script shows the use of chemJEP and Evaluator for calling chemical terms from Groovy.
/* Microspecies Populating Script
*
* Usage: This script needs two tables connected via a one to many relationship, with the microspecies table as the child.
*
* @author Erin Bolstad (ebolstad@chemaxon.com)
* Dec 2011
*/
import com.im.commons.progress.*
import com.im.df.api.chem.MarvinStructure
import chemaxon.jep.Evaluator
import chemaxon.jep.context.MolContext
import chemaxon.formats.MolExporter
def ety = dataTree.rootVertex.entity
def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def molIDs = parentVS.getIds()
def molFld = ety.fields.items.find { it.name == "Structure" }
def protoEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == 'Microspecies' }
def protoEntity = protoEdge.destination.entity
def protoEdp = protoEntity.schema.dataProvider.getEntityDataProvider(protoEntity)
def protoIdFld = protoEntity.fields.items.find { it.name == 'ID' }
def protoHiFld = protoEntity.fields.items.find { it.name == 'PH_HI' }
def protoMidFld = protoEntity.fields.items.find { it.name == 'PH_MID' }
def protoLoFld = protoEntity.fields.items.find { it.name == 'PH_LO' }
def protoMaxOcFld = protoEntity.fields.items.find { it.name == 'PH_MAX_OCCUPANCY' }
def protoStFld = protoEntity.fields.items.find { it.name == 'Structure' }
def protoRefFld = protoEntity.fields.items.find { it.name == 'PH_REF' }
def lock = protoEdp.lockable.withLock('inserting data'){ envRW ->
i=0
molIDs.each { id ->
//Fetch in the molecule from the structures table
def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
getMol = molData[id][molFld.id]
nativeMol = getMol.getNative()
def nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
def pkaArray = []
// Calculate the first acidic pKa
Evaluator evaluator = new Evaluator()
chemJEP = evaluator.compile("pKa('acidic','1')", MolContext.class)
MolContext context = new MolContext()
context.setMolecule(nativeMol)
aresult = chemJEP.evaluate(context)
String nanCheck = aresult
n=2
// pKa evaluator will return NaN if no more pKas to calculate, cycle through pKas till 9.5 hit or till NaN is returned
while ((aresult < 9.5) && (nanCheck !="NaN")){
pkaArray << aresult
chemJEP = evaluator.compile("pKa('acidic','$n')", MolContext.class)
context.setMolecule(nativeMol)
aresult = chemJEP.evaluate(context)
nanCheck = aresult
n++
}
//Calculate the first basic pKa
chemJEP = evaluator.compile("pKa('basic','1')", MolContext.class)
context.setMolecule(nativeMol)
bresult = chemJEP.evaluate(context)
nanCheck = bresult
n=2
//Cycle through basic pKas until 4.5 hit or Nan is returned
while ((nanCheck != "NaN") && (bresult > 4.5)) {
pkaArray << bresult
chemJEP = evaluator.compile("pKa('basic','$n')", MolContext.class)
context.setMolecule(nativeMol)
bresult = chemJEP.evaluate(context)
nanCheck = bresult
n++
}
i++
vals = [:]
// Build insert array with original molecule for insert
vals = [(protoStFld.id):getMol]
vals.putAt(protoRefFld.id, true)
vals.putAt(protoIdFld.id, id)
//Insert the row into Microspecies table
protoEdp.insert(vals, null, envRW)
print "Inserted native structure \n"
arraySize = pkaArray.size()
// If there is a pKa in the array of pKas calculated between 4.5 and 9, enter the loop.
if (arraySize > 0) {
//Sort array of pKas
sortedArray = pkaArray.sort()
// Get the pKas at the halfway points for 4.5 and pKa1, pKax and 9 for microspecies calculation
speciesPkas = [:]
m=0
s= arraySize - 1
msLo = ((sortedArray[0] - 4.5) / 2) + 4.5
msHi = ((9.5 - sortedArray[s]) / 2) + sortedArray[s]
// Calculate the SMILES for the microspecies at the pKa
chemJEP = evaluator.compile("majorMicrospecies('$msLo')", MolContext.class)
context.setMolecule(nativeMol)
mLoresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
def loMolStr = MolExporter.exportToFormat(mLoresult, "smiles")
chemJEP = evaluator.compile("majorMicrospecies('$msHi')", MolContext.class)
context.setMolecule(nativeMol)
msHiresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
hiMolStr = MolExporter.exportToFormat(msHiresult, "smiles")
// Check that this microspecies is not the same one as the input molecule
if (nativeStr != loMolStr) {
def loMvMol = new MarvinStructure(loMolStr, false)
speciesPkas.put(msLo, loMvMol)
}
if (nativeStr != hiMolStr) {
def hiMvMol = new MarvinStructure(hiMolStr, false)
speciesPkas.put(msHi, hiMvMol)
}
m++
// For arrays with more than one pKa, calculate the species at the halfway points of each pKa
while (m < arraySize) {
r = m - 1
msX = ((sortedArray[m] - sortedArray[r]) / 2) + sortedArray[r]
chemJEP = evaluator.compile("majorMicrospecies('$msX')", MolContext.class)
context.setMolecule(nativeMol)
msXresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
xMolStr = MolExporter.exportToFormat(msXresult, "smiles")
if (nativeStr != xMolStr) {
def xMvMol = new MarvinStructure(xMolStr, false)
speciesPkas.put(msX, xMvMol)
}
m++
}
//For each microspecies, determine which pH range it falls in and flag it. Put the max occupancy pH in the array.
speciesPkas.each { maxph, struc ->
vals = [(protoStFld.id):struc]
vals.putAt(protoIdFld.id, id)
def flMaxph = maxph.floatValue()
if ((4.5 < maxph) && (maxph < 7)){
vals.putAt(protoLoFld.id, true)
}
if ((5.75 < maxph) && (maxph < 8.25)){
vals.putAt(protoMidFld.id, true)
}
if ((7 < maxph) && (maxph < 9.5)){
vals.putAt(protoHiFld.id, true)
}
vals.putAt(protoMaxOcFld.id, flMaxph)
// Insert the microspecies
protoEdp.insert(vals, null, envRW)
print "Inserted microspecies \n"
}
}
}
}