Relational SDF Exporter

    This script is designed to export relational data where the structure is in a child entity and in a one-to-one or many-to-one relationship with respect to the parent entity. If many-to-one the structures will be duplicated in the resulting SD file.

    /** Export relational data to a SD file
     *
     * Usage:
     * 1. create this script for a data tree in the project explorer. The parent entity
     * will be a standard entity and it will contain a child structure entity.
     * 2. edit the variables in the 'edit these settings' section
     * 3. run a query to locate the rows you want to export (or do a 'Show All')
     * 4. execute the script
     *
     * Defaults for this script are set for the Wombat (activities view) data tree in the
     * sample project included in IJC. The variables in the 'edit these settings' section
     * will need changing for your data.
     *
     * @author Tim Dudgeon (tdudgeon@chemaxon.com)
     */
    
    import com.im.commons.progress.*
    import chemaxon.formats.MolExporter
    import chemaxon.struc.Molecule
    
    // ---------- edit these settings ----------------------------------------------------
    
    def FIELDS_FROM_PARENT = [ 'TYPE', 'VALUE', 'TARGET.NAME'] // list of field names from the parent entity to export
    def MOL_ENTITY_NAME =  'Wombat structures' // name of the child entity with the structures
    def STRUCTURE_FIELD = 'Structure' // field name of the structure field
    def FIELDS_FROM_CHILD = [ 'CdId', 'Formula' ] // list of field names from the child entity to export. Do not include structure field
    def FILE_NAME = 'C:/tmp/export.sdf' // name of the file to create
    def FIELD_NAMES = [ 'TARGET.NAME' : 'Target', 'CdId' : 'CPD_ID'] // rename some fields in the SD file output
    
    // ------------probably no need to edit anything below here ---------------------------
    
    // root entity
    def parent = dataTree.rootVertex.entity
    
    // ID field
    def fldId = parent.idField
    println "found ID field ${fldId.id}"
    
    // find the child entity with the mols
    def molEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == MOL_ENTITY_NAME }
    def molEntity = molEdge.destination.entity
    def fldFK = molEdge.relationshipDir.srcField
    println "Found child entity: ${molEntity}"
    println "Found FK field ${fldFK.id}  ${fldFK.name}"
    // mol field
    def fldMol = molEntity.fields.items.find { it.name == STRUCTURE_FIELD }
    println "found MOL field ${fldMol.id}"
    
    // data fields from parent
    def fieldsFromParent = [ ]
    FIELDS_FROM_PARENT.each { name ->
        def fld = parent.fields.items.find { it.name == name }
        if (fld) {
            fieldsFromParent << fld
            println "Found parent field ${fld.id} for $name"
        } else {
            println "WARNING: field $name not found"
        }
    }
    
    // data fields from child
    def fieldsFromChild = [ ]
    FIELDS_FROM_CHILD.each { name ->
        def fld = molEntity.fields.items.find { it.name == name }
        if (fld) {
            fieldsFromChild << fld
            println "Found child field ${fld.id} for $name"
        } else {
            println "WARNING: field $name not found"
        }
    }
    
    // ResultSet and VertexStates
    def rs = parent.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
    def parentVS = rs.getVertexState(dataTree.rootVertex)
    def molVS = rs.getVertexState(molEdge.destination)
    def ids = parentVS.ids.toList()
    println "Found $ids.size parent IDs to export"
    
    // now read the data
    def good = 0
    def bad = 0
    
    def exporter = new MolExporter(FILE_NAME, 'sdf')
    
    try {
        ids.each { id ->
    
            // stop if the script is terminated
            if (env.getFeedback().isCancelled()) {
                def msg = "Exporting data to file $FILE_NAME interupted!"
                println msg
                throw new InterruptedException(msg)
            }
    
            try {
                def data = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
                def values = [ : ]
                fieldsFromParent.each {
                    values.put(it, data[id][it.id])
                }
    
                def mol = null
                def childIDs = molVS.getIdsForParentId(id, DFEnvironmentRO.DEV_NULL)
                if (childIDs.size() > 0) {
                    def childID = childIDs[0]
                    def molData = molVS.getData([childID], DFEnvironmentRO.DEV_NULL)
                    mol = molData[childID][fldMol.id]
                    fieldsFromChild.each {
                        values.put(it, molData[childID][it.id])
                    }
                }
    
                println "Exporting ID $id"
    
                def expMol
                // work with a clone so we don't alter the original
                if (!mol || !mol.native ) {
                    expMol = new Molecule()
                } else {
                    expMol = mol.native.cloneMolecule()
                }
                values.each { k,v ->
                    if (v) {
                        def pName = (FIELD_NAMES[k.name] == null ? k.name : FIELD_NAMES[k.name])
                        expMol.setProperty(pName, v.toString())
                    }
                }
                exporter.write(expMol)
    
                good++
    
            } catch (Exception exc) {
                println "EROROR Failed to load ID $id ${exc.toString()}"
                bad++
            }
        }
    } finally {
        exporter.flush()
        exporter.close()
    }
    
    println "Finished exporting data to file $FILE_NAME"
    println "good: $good bad: $bad"