Skip to content

Importing Multiple SDF Files

This script find and import all SDF files in a directory in one run. You can specify the path and filename mask(.sdf by default) in *--- edit these settings --- script section. There is also a dedicated forum topic.

/** Multiple SDF file importer
*
* Imports all SDF files in a given source directory and its all subdirectories.
* Automaticaly read all fields in SDF file and crete new String fields if necessary.
* All fields are save as Strings regardless its actual contents(integer, decimal number etc.)
*
* Usage:
* 1. Edit source directory containing SDF files
* 2. Run script
*
* @author Filip Zimandl <fzimandl@chemaxon.com>
*/

import chemaxon.formats.MolImporter
import chemaxon.struc.Molecule
import chemaxon.marvin.io.MPropHandler
import groovy.io.FileType

// ---------- edit these settings ----------------------------------------------------
String structureFieldName = 'Structure' // the name for the structure field
def pattern = ~/.*\.sdf/ // pattern for file to process
def sourceDir = new File('C:/Documents/chemaxon/sdfs') // dir to start at
// ---------- edit end section -------------------------------------------------------
def entity = dataTree.rootVertex.entity
def edp = entity.schema.dataProvider.getEntityDataProvider(entity)
def structureField = entity.fields.items.find { it.name == structureFieldName }
def schema = dataTree.schema
def traverse

def lock = schema.lockable.obtainLock('create the new field')
def envRW = EnvUtils.createDefaultEnvironmentRW(lock, 'creating the new field', true)

//loading single sdf file procedure
def loadFile = { file ->
    def bytes = file.toString()
    MolImporter importer = new MolImporter(bytes)
    importer.grabbingEnabled = true
    Molecule mol = new Molecule()

    importer.read(mol)
    println mol.getPropertyCount()
    def list = []
    def fields = []
    def values = [:]

    while (importer.read(mol)) {
        values[structureField.id] = importer.grabbedMoleculeString
        for (i = 0; i < mol.getPropertyCount(); i++) {

            //get fields in file
            list.add(mol.getPropertyKey(i))
            println mol.getPropertyKey(i)

            //create fields in entity if does not exist
            def textField = entity.fields.items.find { it.name == list[i] }
            if (textField == null) {
                textField = DFFields.createTextField(entity, list[i], list[i], 1024, envRW)
                fields.add(textField)
            } else {
                fields.add(textField)
            }

            //read values from file
            String property = MPropHandler.convertToString(mol.properties(), list[i]);

            //prepare map of fields and values
            values[textField.id] = property
        }

        //inserting row
        def insLock = edp.lockable.obtainLock('Inserting')
        def insEnvRW = EnvUtils.createDefaultEnvironmentRW(insLock, 'Inserting', true)
        try {
            edp.insert(values, null, insEnvRW)
        } finally {
            insLock?.release()
            insEnvRW?.feedback.finish()
        }
    }
}

traverse = { dir ->
    dir.eachFileMatch(FileType.FILES, pattern) { file ->
        loadFile(file)
    }
    dir.eachDir(traverse)
}

try {
    traverse(sourceDir)
} finally {
    envRW?.feedback.finish()
    lock?.release()    
}