/* Microspecies Populating Script
*
* Usage: This script needs two tables connected via a one to many relationship, with the microspecies table as the child.
*
* @author Erin Bolstad (ebolstad@chemaxon.com)
* Dec 2011
*/
import com.im.commons.progress.*
import com.im.df.api.chem.MarvinStructure
import chemaxon.jep.Evaluator
import chemaxon.jep.context.MolContext
import chemaxon.formats.MolExporter
def ety = dataTree.rootVertex.entity
def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def molIDs = parentVS.getIds()
def molFld = ety.fields.items.find { it.name == "Structure" }
def protoEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == 'Microspecies' }
def protoEntity = protoEdge.destination.entity
def protoEdp = protoEntity.schema.dataProvider.getEntityDataProvider(protoEntity)
def protoIdFld = protoEntity.fields.items.find { it.name == 'ID' }
def protoHiFld = protoEntity.fields.items.find { it.name == 'PH_HI' }
def protoMidFld = protoEntity.fields.items.find { it.name == 'PH_MID' }
def protoLoFld = protoEntity.fields.items.find { it.name == 'PH_LO' }
def protoMaxOcFld = protoEntity.fields.items.find { it.name == 'PH_MAX_OCCUPANCY' }
def protoStFld = protoEntity.fields.items.find { it.name == 'Structure' }
def protoRefFld = protoEntity.fields.items.find { it.name == 'PH_REF' }
def lock = protoEdp.lockable.withLock('inserting data'){ envRW ->
i=0
molIDs.each { id ->
//Fetch in the molecule from the structures table
def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
getMol = molData[id][molFld.id]
nativeMol = getMol.getNative()
def nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
def pkaArray = []
// Calculate the first acidic pKa
Evaluator evaluator = new Evaluator()
chemJEP = evaluator.compile("pKa('acidic','1')", MolContext.class)
MolContext context = new MolContext()
context.setMolecule(nativeMol)
aresult = chemJEP.evaluate(context)
String nanCheck = aresult
n=2
// pKa evaluator will return NaN if no more pKas to calculate, cycle through pKas till 9.5 hit or till NaN is returned
while ((aresult < 9.5) && (nanCheck !="NaN")){
pkaArray << aresult
chemJEP = evaluator.compile("pKa('acidic','$n')", MolContext.class)
context.setMolecule(nativeMol)
aresult = chemJEP.evaluate(context)
nanCheck = aresult
n++
}
//Calculate the first basic pKa
chemJEP = evaluator.compile("pKa('basic','1')", MolContext.class)
context.setMolecule(nativeMol)
bresult = chemJEP.evaluate(context)
nanCheck = bresult
n=2
//Cycle through basic pKas until 4.5 hit or Nan is returned
while ((nanCheck != "NaN") && (bresult > 4.5)) {
pkaArray << bresult
chemJEP = evaluator.compile("pKa('basic','$n')", MolContext.class)
context.setMolecule(nativeMol)
bresult = chemJEP.evaluate(context)
nanCheck = bresult
n++
}
i++
vals = [:]
// Build insert array with original molecule for insert
vals = [(protoStFld.id):getMol]
vals.putAt(protoRefFld.id, true)
vals.putAt(protoIdFld.id, id)
//Insert the row into Microspecies table
protoEdp.insert(vals, null, envRW)
print "Inserted native structure \n"
arraySize = pkaArray.size()
// If there is a pKa in the array of pKas calculated between 4.5 and 9, enter the loop.
if (arraySize > 0) {
//Sort array of pKas
sortedArray = pkaArray.sort()
// Get the pKas at the halfway points for 4.5 and pKa1, pKax and 9 for microspecies calculation
speciesPkas = [:]
m=0
s= arraySize - 1
msLo = ((sortedArray[0] - 4.5) / 2) + 4.5
msHi = ((9.5 - sortedArray[s]) / 2) + sortedArray[s]
// Calculate the SMILES for the microspecies at the pKa
chemJEP = evaluator.compile("majorMicrospecies('$msLo')", MolContext.class)
context.setMolecule(nativeMol)
mLoresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
def loMolStr = MolExporter.exportToFormat(mLoresult, "smiles")
chemJEP = evaluator.compile("majorMicrospecies('$msHi')", MolContext.class)
context.setMolecule(nativeMol)
msHiresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
hiMolStr = MolExporter.exportToFormat(msHiresult, "smiles")
// Check that this microspecies is not the same one as the input molecule
if (nativeStr != loMolStr) {
def loMvMol = new MarvinStructure(loMolStr, false)
speciesPkas.put(msLo, loMvMol)
}
if (nativeStr != hiMolStr) {
def hiMvMol = new MarvinStructure(hiMolStr, false)
speciesPkas.put(msHi, hiMvMol)
}
m++
// For arrays with more than one pKa, calculate the species at the halfway points of each pKa
while (m < arraySize) {
r = m - 1
msX = ((sortedArray[m] - sortedArray[r]) / 2) + sortedArray[r]
chemJEP = evaluator.compile("majorMicrospecies('$msX')", MolContext.class)
context.setMolecule(nativeMol)
msXresult = chemJEP.evaluate(context)
nativeStr = MolExporter.exportToFormat(nativeMol, "smiles")
xMolStr = MolExporter.exportToFormat(msXresult, "smiles")
if (nativeStr != xMolStr) {
def xMvMol = new MarvinStructure(xMolStr, false)
speciesPkas.put(msX, xMvMol)
}
m++
}
//For each microspecies, determine which pH range it falls in and flag it. Put the max occupancy pH in the array.
speciesPkas.each { maxph, struc ->
vals = [(protoStFld.id):struc]
vals.putAt(protoIdFld.id, id)
def flMaxph = maxph.floatValue()
if ((4.5 < maxph) && (maxph < 7)){
vals.putAt(protoLoFld.id, true)
}
if ((5.75 < maxph) && (maxph < 8.25)){
vals.putAt(protoMidFld.id, true)
}
if ((7 < maxph) && (maxph < 9.5)){
vals.putAt(protoHiFld.id, true)
}
vals.putAt(protoMaxOcFld.id, flMaxph)
// Insert the microspecies
protoEdp.insert(vals, null, envRW)
print "Inserted microspecies \n"
}
}
}
}