Importing Multiple SDF Files

    This script find and import all SDF files in a directory in one run. You can specify the path and filename mask(.sdf by default) in --- edit these settings ---* script section. There is also a dedicated forum topic.

    
    /** Multiple SDF file importer
    * 
    * Imports all SDF files in a given source directory and its all subdirectories.
    * Automaticaly read all fields in SDF file and crete new String fields if necessary.
    * All fields are save as Strings regardless its actual contents(integer, decimal number etc.)
    *
    * Usage:
    * 1. Edit source directory containing SDF files
    * 2. Run script
    *
    * @author Filip Zimandl <fzimandl@chemaxon.com>
    */
     
    import chemaxon.formats.MolImporter
    import chemaxon.struc.Molecule
    import chemaxon.marvin.io.MPropHandler
    import groovy.io.FileType
     
    // ---------- edit these settings ----------------------------------------------------
    String structureFieldName = 'Structure' // the name for the structure field
    def pattern = ~/.*\\.sdf/ // pattern for file to process
    def sourceDir = new File('/Users/filip/Downloads/testScript') // dir to start at
    // ---------- edit end section -------------------------------------------------------
    def entity = dataTree.rootVertex.entity
    def edp = entity.schema.dataProvider.getEntityDataProvider(entity)
    def structureField = entity.fields.items.find { it.name == structureFieldName }
    def schema = dataTree.schema
    def traverse
     
    def lock = schema.lockable.obtainLock('create the new field')
    def envRW = EnvUtils.createDefaultEnvironmentRW(lock, 'creating the new field', true)
     
    //loading single sdf file procedure
    def loadFile = { file ->
        def bytes = file.toString()
        MolImporter importer = new MolImporter(bytes)
        importer.grabbingEnabled = true
        Molecule mol = new Molecule()
     
        importer.read(mol)
        println mol.getPropertyCount()
        def list = []
        def fields = []
        def values = [:]
    
        while (importer.read(mol)) {
            values[structureField.id] = importer.grabbedMoleculeString
            for (i = 0; i < mol.getPropertyCount(); i++) {
    
                //get fields in file
                list.add(mol.getPropertyKey(i))
                println mol.getPropertyKey(i)
    
                //create fields in entity if does not exist
                def textField = entity.fields.items.find { it.name == list[i] }
                if (textField == null) {
                    textField = DFFields.createTextField(entity, list[i], list[i], 1024, envRW)
                    fields.add(textField)
                } else {
                    fields.add(textField)
                }
    
                //read values from file
                String property = MPropHandler.convertToString(mol.properties(), list[i]);
    
                //prepare map of fields and values
                values[textField.id] = property
            }
    
            //inserting row
            def insLock = edp.lockable.obtainLock('Inserting')
            def insEnvRW = EnvUtils.createDefaultEnvironmentRW(insLock, 'Inserting', true)
            try {
                edp.insert(values, null, insEnvRW)
            } finally {
                insLock?.release()
                insEnvRW?.feedback.finish()
            }
        }
    }
     
    traverse = { dir ->
        dir.eachFileMatch(FileType.FILES, pattern) { file ->
            loadFile(file)
        }
        dir.eachDir(traverse)
    }
     
    try {
        traverse(sourceDir)
    } finally {
        envRW?.feedback.finish()
        lock?.release()    
    }