Source code for ilastik.applets.dataSelection.dataSelectionApplet

###############################################################################
#   ilastik: interactive learning and segmentation toolkit
#
#       Copyright (C) 2011-2014, the ilastik developers
#                                <team@ilastik.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# In addition, as a special exception, the copyright holders of
# ilastik give you permission to combine ilastik with applets,
# workflows and plugins which are not covered under the GNU
# General Public License.
#
# See the LICENSE file for details. License information is also available
# on the ilastik web site at:
#		   http://ilastik.org/license.html
###############################################################################
from __future__ import division
import os
import glob
import argparse
import collections
import logging
logger = logging.getLogger(__name__)

import vigra
from lazyflow.utility import PathComponents, isUrl, make_absolute
from ilastik.applets.base.applet import Applet
from opDataSelection import OpMultiLaneDataSelectionGroup, DatasetInfo
from dataSelectionSerializer import DataSelectionSerializer, Ilastik05DataSelectionDeserializer

[docs]class DataSelectionApplet( Applet ): """ This applet allows the user to select sets of input data, which are provided as outputs in the corresponding top-level applet operator. """ DEFAULT_INSTRUCTIONS = "Use the controls shown to the right to add image files to this workflow." def __init__(self, workflow, title, projectFileGroupName, supportIlastik05Import=False, batchDataGui=False, forceAxisOrder=None, instructionText=DEFAULT_INSTRUCTIONS, max_lanes=None, show_axis_details=False): self.__topLevelOperator = OpMultiLaneDataSelectionGroup(parent=workflow, forceAxisOrder=forceAxisOrder) super(DataSelectionApplet, self).__init__( title, syncWithImageIndex=False ) self._serializableItems = [ DataSelectionSerializer(self.topLevelOperator, projectFileGroupName) ] if supportIlastik05Import: self._serializableItems.append(Ilastik05DataSelectionDeserializer(self.topLevelOperator)) self._instructionText = instructionText self._gui = None self._batchDataGui = batchDataGui self._title = title self._max_lanes = max_lanes self.busy = False self.show_axis_details = show_axis_details # # GUI # def getMultiLaneGui( self ): if self._gui is None: from dataSelectionGui import DataSelectionGui, GuiMode guiMode = { True: GuiMode.Batch, False: GuiMode.Normal }[self._batchDataGui] self._gui = DataSelectionGui( self, self.topLevelOperator, self._serializableItems[0], self._instructionText, guiMode, self._max_lanes, self.show_axis_details ) return self._gui # # Top-level operator # @property def topLevelOperator(self): return self.__topLevelOperator # # Project serialization # @property def dataSerializers(self): return self._serializableItems @classmethod
[docs] def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument('--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name )) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('unspecified_input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) arg_parser.add_argument('--input_axes', help="Explicitly specify the axes of your dataset.", required=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) if parsed_args.unspecified_input_files: # We allow the file list to go to the 'default' role, # but only if no other roles were explicitly configured. arg_names = map(cls._role_name_to_arg_name, role_names) for arg_name in arg_names: if getattr(parsed_args, arg_name): # FIXME: This error message could be more helpful. role_args = map( cls._role_name_to_arg_name, role_names ) role_args = map( lambda s: '--' + s, role_args ) role_args_str = ", ".join( role_args ) raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str ) # Relocate to the 'default' role arg_name = cls._role_name_to_arg_name(role_names[0]) setattr(parsed_args, arg_name, parsed_args.unspecified_input_files) parsed_args.unspecified_input_files = None # Replace '~' with home dir for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) paths_for_role = getattr(parsed_args, arg_name) if paths_for_role: for i, path in enumerate( paths_for_role ): paths_for_role[i] = os.path.expanduser( path ) # Check for errors: Do all input files exist? all_input_paths = [] for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error("Could not find any files for globstring: {}".format(p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError("Could not find one or more input files. See logged errors.") return parsed_args, unused_args
@classmethod def _role_name_to_arg_name(cls, role_name): arg_name = role_name arg_name = arg_name.lower() arg_name = arg_name.replace(' ', '_').replace('-', '_') return arg_name @classmethod def role_paths_from_parsed_args(cls, parsed_args, role_names): role_paths = collections.OrderedDict() for role_index, role_name in enumerate(role_names): arg_name = cls._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths or [] # As far as this parser is concerned, all roles except the first are optional. # (Workflows that require the other roles are responsible for raising an error themselves.) for role_index in range(1,len(role_names)): # Fill in None for missing files if role_index not in role_paths: role_paths[role_index] = [] num_missing = len(role_paths[0]) - len(role_paths[role_index]) role_paths[role_index] += [None] * num_missing return role_paths
[docs] def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = self.role_paths_from_parsed_args(parsed_args, role_names) for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() ) input_infos = [DatasetInfo(p) if p else None for p in input_paths] if parsed_args.input_axes: for info in filter(None, input_infos): info.axistags = vigra.defaultAxistags(parsed_args.input_axes) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes) ) for lane_index, info in enumerate(input_infos): if info: opDataSelection.DatasetGroup[lane_index][role_index].setValue( info ) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][role_index] if output_slot.ready() and output_slot.meta.prefer_2d and 'z' in output_slot.meta.axistags: need_warning = True break if need_warning: logger.warn("*******************************************************************************************") logger.warn("Some of your input data is stored in a format that is not efficient for 3D access patterns.") logger.warn("Performance may suffer as a result. For best performance, use a chunked HDF5 volume.") logger.warn("*******************************************************************************************")
@classmethod
[docs] def convertStacksToH5(cls, filePaths, stackVolumeCacheDir): """ If any of the files in filePaths appear to be globstrings for a stack, convert the given stack to hdf5 format. Return the filePaths list with globstrings replaced by the paths to the new hdf5 volumes. """ import hashlib import pickle import h5py from lazyflow.graph import Graph from lazyflow.operators.ioOperators import OpStackToH5Writer filePaths = list(filePaths) for i, path in enumerate(filePaths): if not path or '*' not in path: continue globstring = path # Embrace paranoia: # We want to make sure we never re-use a stale cache file for a new dataset, # even if the dataset is located in the same location as a previous one and has the same globstring! # Create a sha-1 of the file name and modification date. sha = hashlib.sha1() files = sorted([k.replace('\\', '/') for k in glob.glob( path )]) for f in files: sha.update(f) sha.update(pickle.dumps(os.stat(f).st_mtime)) stackFile = sha.hexdigest() + '.h5' stackPath = os.path.join( stackVolumeCacheDir, stackFile ).replace('\\', '/') # Overwrite original path filePaths[i] = stackPath + "/volume/data" # Generate the hdf5 if it doesn't already exist if os.path.exists(stackPath): logger.info( "Using previously generated hdf5 volume for stack {}".format(path) ) logger.info( "Volume path: {}".format(filePaths[i]) ) else: logger.info( "Generating hdf5 volume for stack {}".format(path) ) logger.info( "Volume path: {}".format(filePaths[i]) ) if not os.path.exists( stackVolumeCacheDir ): os.makedirs( stackVolumeCacheDir ) with h5py.File(stackPath) as f: # Configure the conversion operator opWriter = OpStackToH5Writer( graph=Graph() ) opWriter.hdf5Group.setValue(f) opWriter.hdf5Path.setValue("volume/data") opWriter.GlobString.setValue(globstring) # Initiate the write success = opWriter.WriteImage.value assert success, "Something went wrong when generating an hdf5 file from an image sequence." return filePaths
def configureRoleFromJson(self, lane, role, dataset_info_namespace): assert sys.version_info.major == 2, "Alert! This function has not been tested "\ "under python 3. Please remove this assetion and be wary of any strnage behavior you encounter" opDataSelection = self.topLevelOperator logger.debug( "Configuring dataset for role {}".format( role ) ) logger.debug( "Params: {}".format(dataset_info_namespace) ) datasetInfo = DatasetInfo() datasetInfo.updateFromJson( dataset_info_namespace ) # Check for globstring, which means we need to import the stack first. if '*' in datasetInfo.filePath: totalProgress = [-100] def handleStackImportProgress( progress ): if progress // 10 != totalProgress[0] // 10: totalProgress[0] = progress logger.info( "Importing stack: {}%".format( totalProgress[0] ) ) serializer = self.dataSerializers[0] serializer.progressSignal.connect( handleStackImportProgress ) serializer.importStackAsLocalDataset( datasetInfo ) opDataSelection.DatasetGroup[lane][role].setValue( datasetInfo )