Importing Multiple SDF Files

This script find and import all SDF files in a directory in one run. You can specify the path and filename mask(*.sdf by default) in --- edit these settings --- script section. There is also a dedicated forum topic.

/** Multiple SDF file importer
*
* Imports all SDF files in a given source directory and its all subdirectories.
* Automaticaly read all fields in SDF file and crete new String fields if necessary.
* All fields are save as Strings regardless its actual contents(integer, decimal number etc.)
*
* Usage:
* 1. Edit source directory containing SDF files
* 2. Run script
*
* @author Filip Zimandl <[email protected]>
*/
 
import chemaxon.formats.MolImporter
import chemaxon.struc.Molecule
import chemaxon.marvin.io.MPropHandler
import groovy.io.FileType
 
// ---------- edit these settings ----------------------------------------------------
String structureFieldName = 'Structure' // the name for the structure field
def pattern = ~/.*\.sdf/ // pattern for file to process
def sourceDir = new File('/Users/filip/Downloads/testScript') // dir to start at
// ---------- edit end section -------------------------------------------------------
def entity = dataTree.rootVertex.entity
def edp = entity.schema.dataProvider.getEntityDataProvider(entity)
def structureField = entity.fields.items.find { it.name == structureFieldName }
def schema = dataTree.schema
def traverse
 
def lock = schema.lockable.obtainLock('create the new field')
def envRW = EnvUtils.createDefaultEnvironmentRW(lock, 'creating the new field', true)
 
//loading single sdf file procedure
def loadFile = { file ->
def bytes = file.toString()
MolImporter importer = new MolImporter(bytes)
importer.grabbingEnabled = true
Molecule mol = new Molecule()
 
importer.read(mol)
println mol.getPropertyCount()
def list = []
def fields = []
def values = [:]
while (importer.read(mol)) {
values[structureField.id] = importer.grabbedMoleculeString
for (i = 0; i < mol.getPropertyCount(); i++) {
//get fields in file
list.add(mol.getPropertyKey(i))
println mol.getPropertyKey(i)
//create fields in entity if does not exist
def textField = entity.fields.items.find { it.name == list[i] }
if (textField == null) {
textField = DFFields.createTextField(entity, list[i], list[i], 1024, envRW)
fields.add(textField)
} else {
fields.add(textField)
}
//read values from file
String property = MPropHandler.convertToString(mol.properties(), list[i]);
//prepare map of fields and values
values[textField.id] = property
}
//inserting row
def insLock = edp.lockable.obtainLock('Inserting')
def insEnvRW = EnvUtils.createDefaultEnvironmentRW(insLock, 'Inserting', true)
try {
edp.insert(values, null, insEnvRW)
} finally {
insLock?.release()
insEnvRW?.feedback.finish()
}
}
}
 
traverse = { dir ->
dir.eachFileMatch(FileType.FILES, pattern) { file ->
loadFile(file)
}
dir.eachDir(traverse)
}
 
try {
traverse(sourceDir)
} finally {
envRW?.feedback.finish()
lock?.release()
}