###############################################################################
# ilastik: interactive learning and segmentation toolkit
#
# Copyright (C) 2011-2014, the ilastik developers
# <team@ilastik.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# In addition, as a special exception, the copyright holders of
# ilastik give you permission to combine ilastik with applets,
# workflows and plugins which are not covered under the GNU
# General Public License.
#
# See the LICENSE file for details. License information is also available
# on the ilastik web site at:
# http://ilastik.org/license.html
###############################################################################
import os
import glob
import uuid
import numpy
import vigra
from lazyflow.graph import Operator, InputSlot, OutputSlot, OperatorWrapper
from lazyflow.utility.jsonConfig import RoiTuple
from lazyflow.operators.ioOperators import OpStreamingHdf5Reader, OpInputDataReader
from lazyflow.operators.valueProviders import OpMetadataInjector
from lazyflow.operators.opArrayPiper import OpArrayPiper
from ilastik.applets.base.applet import DatasetConstraintError
from ilastik.utility import OpMultiLaneWrapper
from lazyflow.utility import PathComponents, isUrl, make_absolute
from lazyflow.operators.opReorderAxes import OpReorderAxes
class DatasetInfo(object):
"""
Struct-like class for describing dataset info.
"""
class Location():
FileSystem = 0
ProjectInternal = 1
PreloadedArray = 2
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None):
"""
filepath: may be a globstring or a full hdf5 path+dataset
jsonNamespace: If provided, overrides default settings after filepath is applied
cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used.
preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray.
"""
assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
cwd = cwd or os.getcwd()
self.preloaded_array = preloaded_array # See description above.
Location = DatasetInfo.Location
self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet)
self._datasetId = "" # The name of the data within the project file (if it is stored locally)
self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier.
self.drange = None
self.normalizeDisplay = True
self.fromstack = False
self.nickname = ""
self.axistags = None
self.subvolume_roi = None
self.location = Location.FileSystem
self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask.
if self.preloaded_array is not None:
self.filePath = "" # set property to ensure unique _datasetId
self.location = Location.PreloadedArray
self.fromstack = False
self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name )
if hasattr(self.preloaded_array, 'axistags'):
self.axistags = self.preloaded_array.axistags
# Set defaults for location, nickname, filepath, and fromstack
if filepath:
# Check for sequences (either globstring or separated paths),
file_list = None
if '*' in filepath:
file_list = glob.glob(filepath)
file_list = sorted(file_list)
if not isUrl(filepath) and os.path.pathsep in filepath:
file_list = filepath.split(os.path.pathsep)
# For stacks, choose nickname based on a common prefix
if file_list:
fromstack = True
# Convert all paths to absolute
file_list = map(lambda f: make_absolute(f, cwd), file_list)
if '*' in filepath:
filepath = make_absolute(filepath, cwd)
else:
filepath = os.path.pathsep.join( file_list )
# Add an underscore for each wildcard digit
prefix = os.path.commonprefix(file_list)
num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
else:
fromstack = False
if not isUrl(filepath):
# Convert all (non-url) paths to absolute
filepath = make_absolute(filepath, cwd)
nickname = PathComponents(filepath).filenameBase
self.location = DatasetInfo.Location.FileSystem
self.nickname = nickname
self.filePath = filepath
self.fromstack = fromstack
if jsonNamespace is not None:
self.updateFromJson( jsonNamespace )
@property
def filePath(self):
return self._filePath
@filePath.setter
def filePath(self, newPath):
self._filePath = newPath
# Reset our id any time the filepath changes
self._datasetId = str(uuid.uuid1())
@property
def datasetId(self):
return self._datasetId
DatasetInfoSchema = \
{
"_schema_name" : "dataset-info",
"_schema_version" : 0.1,
"filepath" : str,
"drange" : tuple,
"nickname" : str,
"axistags" : str,
"subvolume_roi" : RoiTuple()
}
def __str__(self):
s = "{ "
s += "filepath: {},\n".format(self.filePath)
s += "location: {}\n".format( { DatasetInfo.Location.FileSystem: "FileSystem",
DatasetInfo.Location.ProjectInternal: "ProjectInternal",
DatasetInfo.Location.PreloadedArray: "PreloadedArray"
}[self.location] )
s += "nickname: {},\n".format( self.nickname )
if self.axistags:
s +="axistags: {},\n".format(self.axistags)
if self.drange:
s += "drange: {},\n".format( self.drange )
s += "normalizeDisplay: {}\n".format( self.normalizeDisplay )
if self.fromstack:
s += "fromstack: {}\n".format( self.fromstack )
if self.subvolume_roi:
s += "subvolume_roi: {},\n".format( self.subvolume_roi )
s += " }\n"
return s
def updateFromJson(self, namespace):
"""
Given a namespace object returned by a JsonConfigParser,
update the corresponding non-None fields of this DatasetInfo.
"""
self.filePath = namespace.filepath or self.filePath
self.drange = namespace.drange or self.drange
self.nickname = namespace.nickname or self.nickname
if namespace.axistags is not None:
self.axistags = vigra.defaultAxistags(namespace.axistags)
self.subvolume_roi = namespace.subvolume_roi or self.subvolume_roi
[docs]class OpDataSelection(Operator):
"""
The top-level operator for the data selection applet, implemented as a single-image operator.
The applet uses an OperatorWrapper to make it suitable for use in a workflow.
"""
name = "OpDataSelection"
category = "Top-level"
SupportedExtensions = OpInputDataReader.SupportedExtensions
# Inputs
RoleName = InputSlot(stype='string', value='')
ProjectFile = InputSlot(stype='object', optional=True) #: The project hdf5 File object (already opened)
ProjectDataGroup = InputSlot(stype='string', optional=True) #: The internal path to the hdf5 group where project-local datasets are stored within the project file
WorkingDirectory = InputSlot(stype='filestring') #: The filesystem directory where the project file is located
Dataset = InputSlot(stype='object') #: A DatasetInfo object
# Outputs
Image = OutputSlot() #: The output image
AllowLabels = OutputSlot(stype='bool') #: A bool indicating whether or not this image can be used for training
_NonTransposedImage = OutputSlot() #: The output slot, in the data's original axis ordering (regardless of forceAxisOrder)
ImageName = OutputSlot(stype='string') #: The name of the output image
[docs] class InvalidDimensionalityError(Exception):
"""Raised if the user tries to replace the dataset with a new one of differing dimensionality."""
def __init__(self, message ):
super( OpDataSelection.InvalidDimensionalityError, self ).__init__()
self.message = message
def __str__(self):
return self.message
def __init__(self, forceAxisOrder=False, *args, **kwargs):
super(OpDataSelection, self).__init__(*args, **kwargs)
self.forceAxisOrder = forceAxisOrder
self._opReaders = []
# If the gui calls disconnect() on an input slot without replacing it with something else,
# we still need to clean up the internal operator that was providing our data.
self.ProjectFile.notifyUnready(self.internalCleanup)
self.ProjectDataGroup.notifyUnready(self.internalCleanup)
self.WorkingDirectory.notifyUnready(self.internalCleanup)
self.Dataset.notifyUnready(self.internalCleanup)
def internalCleanup(self, *args):
if len(self._opReaders) > 0:
self.Image.disconnect()
self._NonTransposedImage.disconnect()
for reader in reversed(self._opReaders):
reader.cleanUp()
self._opReaders = []
def setupOutputs(self):
self.internalCleanup()
datasetInfo = self.Dataset.value
try:
# Data only comes from the project file if the user said so AND it exists in the project
datasetInProject = (datasetInfo.location == DatasetInfo.Location.ProjectInternal)
datasetInProject &= self.ProjectFile.ready()
if datasetInProject:
internalPath = self.ProjectDataGroup.value + '/' + datasetInfo.datasetId
datasetInProject &= internalPath in self.ProjectFile.value
# If we should find the data in the project file, use a dataset reader
if datasetInProject:
opReader = OpStreamingHdf5Reader(parent=self)
opReader.Hdf5File.setValue(self.ProjectFile.value)
opReader.InternalPath.setValue(internalPath)
providerSlot = opReader.OutputImage
elif datasetInfo.location == DatasetInfo.Location.PreloadedArray:
preloaded_array = datasetInfo.preloaded_array
assert preloaded_array is not None
if not hasattr(preloaded_array, 'axistags'):
# Guess the axis order, since one was not provided.
axisorders = { 2 : 'yx',
3 : 'zyx',
4 : 'zyxc',
5 : 'tzyxc' }
shape = preloaded_array.shape
ndim = preloaded_array.ndim
assert ndim != 0, "Support for 0-D data not yet supported"
assert ndim != 1, "Support for 1-D data not yet supported"
assert ndim <= 5, "No support for data with more than 5 dimensions."
axisorder = axisorders[ndim]
if ndim == 3 and shape[2] <= 4:
# Special case: If the 3rd dim is small, assume it's 'c', not 'z'
axisorder = 'yxc'
preloaded_array = vigra.taggedView(preloaded_array, axisorder)
opReader = OpArrayPiper(parent=self)
opReader.Input.setValue( preloaded_array )
providerSlot = opReader.Output
else:
# Use a normal (filesystem) reader
opReader = OpInputDataReader(parent=self)
if datasetInfo.subvolume_roi is not None:
opReader.SubVolumeRoi.setValue( datasetInfo.subvolume_roi )
opReader.WorkingDirectory.setValue( self.WorkingDirectory.value )
opReader.FilePath.setValue(datasetInfo.filePath)
providerSlot = opReader.Output
self._opReaders.append(opReader)
# Inject metadata if the dataset info specified any.
# Also, inject if if dtype is uint8, which we can reasonably assume has drange (0,255)
metadata = {}
metadata['display_mode'] = datasetInfo.display_mode
role_name = self.RoleName.value
if 'c' not in providerSlot.meta.getTaggedShape():
num_channels = 0
else:
num_channels = providerSlot.meta.getTaggedShape()['c']
if num_channels > 1:
metadata['channel_names'] = ["{}-{}".format(role_name, i) for i in range(num_channels)]
else:
metadata['channel_names'] = [role_name]
if datasetInfo.drange is not None:
metadata['drange'] = datasetInfo.drange
elif providerSlot.meta.dtype == numpy.uint8:
# SPECIAL case for uint8 data: Provide a default drange.
# The user can always override this herself if she wants.
metadata['drange'] = (0,255)
if datasetInfo.normalizeDisplay is not None:
metadata['normalizeDisplay'] = datasetInfo.normalizeDisplay
if datasetInfo.axistags is not None:
if len(datasetInfo.axistags) != len(providerSlot.meta.shape):
# This usually only happens when we copied a DatasetInfo from another lane,
# and used it as a 'template' to initialize this lane.
# This happens in the BatchProcessingApplet when it attempts to guess the axistags of
# batch images based on the axistags chosen by the user in the interactive images.
# If the interactive image tags don't make sense for the batch image, you get this error.
raise Exception( "Your dataset's provided axistags ({}) do not have the "
"correct dimensionality for your dataset, which has {} dimensions."
.format( "".join(tag.key for tag in datasetInfo.axistags), len(providerSlot.meta.shape) ) )
metadata['axistags'] = datasetInfo.axistags
if datasetInfo.subvolume_roi is not None:
metadata['subvolume_roi'] = datasetInfo.subvolume_roi
# FIXME: We are overwriting the axistags metadata to intentionally allow
# the user to change our interpretation of which axis is which.
# That's okay, but technically there's a special corner case if
# the user redefines the channel axis index.
# Technically, it invalidates the meaning of meta.ram_usage_per_requested_pixel.
# For most use-cases, that won't really matter, which is why I'm not worrying about it right now.
opMetadataInjector = OpMetadataInjector( parent=self )
opMetadataInjector.Input.connect( providerSlot )
opMetadataInjector.Metadata.setValue( metadata )
providerSlot = opMetadataInjector.Output
self._opReaders.append( opMetadataInjector )
self._NonTransposedImage.connect(providerSlot)
if self.forceAxisOrder:
# Before we re-order, make sure no non-singleton
# axes would be dropped by the forced order.
output_order = "".join(self.forceAxisOrder)
provider_order = "".join(providerSlot.meta.getAxisKeys())
tagged_provider_shape = providerSlot.meta.getTaggedShape()
dropped_axes = set(provider_order) - set(output_order)
if any(tagged_provider_shape[a] > 1 for a in dropped_axes):
msg = "The axes of your dataset ({}) are not compatible with the axes used by this workflow ({}). Please fix them."\
.format(provider_order, output_order)
raise DatasetConstraintError("DataSelection", msg)
op5 = OpReorderAxes(parent=self)
op5.AxisOrder.setValue(self.forceAxisOrder)
op5.Input.connect(providerSlot)
providerSlot = op5.Output
self._opReaders.append(op5)
# If the channel axis is not last (or is missing),
# make sure the axes are re-ordered so that channel is last.
if providerSlot.meta.axistags.index('c') != len( providerSlot.meta.axistags )-1:
op5 = OpReorderAxes( parent=self )
keys = providerSlot.meta.getTaggedShape().keys()
try:
# Remove if present.
keys.remove('c')
except ValueError:
pass
# Append
keys.append('c')
op5.AxisOrder.setValue( "".join( keys ) )
op5.Input.connect( providerSlot )
providerSlot = op5.Output
self._opReaders.append( op5 )
# Connect our external outputs to the internal operators we chose
self.Image.connect(providerSlot)
# Set the image name and usage flag
self.AllowLabels.setValue( datasetInfo.allowLabels )
# If the reading operator provides a nickname, use it.
if self.Image.meta.nickname is not None:
datasetInfo.nickname = self.Image.meta.nickname
imageName = datasetInfo.nickname
if imageName == "":
imageName = datasetInfo.filePath
self.ImageName.setValue(imageName)
except:
self.internalCleanup()
raise
def propagateDirty(self, slot, subindex, roi):
# Output slots are directly connected to internal operators
pass
@classmethod
def getInternalDatasets(cls, filePath):
return OpInputDataReader.getInternalDatasets( filePath )
class OpDataSelectionGroup( Operator ):
# Inputs
ProjectFile = InputSlot(stype='object', optional=True)
ProjectDataGroup = InputSlot(stype='string', optional=True)
WorkingDirectory = InputSlot(stype='filestring')
DatasetRoles = InputSlot(stype='object')
DatasetGroup = InputSlot(stype='object', level=1, optional=True) # Must mark as optional because not all subslots are required.
# Outputs
ImageGroup = OutputSlot(level=1)
# These output slots are provided as a convenience, since otherwise it is tricky to create a lane-wise multislot of level-1 for only a single role.
# (It can be done, but requires OpTransposeSlots to invert the level-2 multislot indexes...)
Image = OutputSlot() # The first dataset. Equivalent to ImageGroup[0]
Image1 = OutputSlot() # The second dataset. Equivalent to ImageGroup[1]
Image2 = OutputSlot() # The third dataset. Equivalent to ImageGroup[2]
AllowLabels = OutputSlot(stype='bool') # Pulled from the first dataset only.
_NonTransposedImageGroup = OutputSlot(level=1)
# Must be the LAST slot declared in this class.
# When the shell detects that this slot has been resized,
# it assumes all the others have already been resized.
ImageName = OutputSlot() # Name of the first dataset is used. Other names are ignored.
def __init__(self, forceAxisOrder=None, *args, **kwargs):
super(OpDataSelectionGroup, self).__init__(*args, **kwargs)
self._opDatasets = None
self._roles = []
self._forceAxisOrder = forceAxisOrder
def handleNewRoles(*args):
self.DatasetGroup.resize( len(self.DatasetRoles.value) )
self.DatasetRoles.notifyReady( handleNewRoles )
def setupOutputs(self):
# Create internal operators
if self.DatasetRoles.value != self._roles:
self._roles = self.DatasetRoles.value
# Clean up the old operators
self.ImageGroup.disconnect()
self.Image.disconnect()
self.Image1.disconnect()
self.Image2.disconnect()
self._NonTransposedImageGroup.disconnect()
if self._opDatasets is not None:
self._opDatasets.cleanUp()
self._opDatasets = OperatorWrapper( OpDataSelection, parent=self, operator_kwargs={ 'forceAxisOrder' : self._forceAxisOrder },
broadcastingSlotNames=['ProjectFile', 'ProjectDataGroup', 'WorkingDirectory'] )
self.ImageGroup.connect( self._opDatasets.Image )
self._NonTransposedImageGroup.connect( self._opDatasets._NonTransposedImage )
self._opDatasets.Dataset.connect( self.DatasetGroup )
self._opDatasets.ProjectFile.connect( self.ProjectFile )
self._opDatasets.ProjectDataGroup.connect( self.ProjectDataGroup )
self._opDatasets.WorkingDirectory.connect( self.WorkingDirectory )
for role_index, opDataSelection in enumerate(self._opDatasets):
opDataSelection.RoleName.setValue(self._roles[role_index])
if len( self._opDatasets.Image ) > 0:
self.Image.connect( self._opDatasets.Image[0] )
if len(self._opDatasets.Image) >= 2:
self.Image1.connect( self._opDatasets.Image[1] )
else:
self.Image1.disconnect()
self.Image1.meta.NOTREADY = True
if len(self._opDatasets.Image) >= 3:
self.Image2.connect( self._opDatasets.Image[2] )
else:
self.Image2.disconnect()
self.Image2.meta.NOTREADY = True
self.ImageName.connect( self._opDatasets.ImageName[0] )
self.AllowLabels.connect( self._opDatasets.AllowLabels[0] )
else:
self.Image.disconnect()
self.Image1.disconnect()
self.Image2.disconnect()
self.ImageName.disconnect()
self.AllowLabels.disconnect()
self.Image.meta.NOTREADY = True
self.Image1.meta.NOTREADY = True
self.Image2.meta.NOTREADY = True
self.ImageName.meta.NOTREADY = True
self.AllowLabels.meta.NOTREADY = True
def execute(self, slot, subindex, rroi, result):
assert False, "Unknown or unconnected output slot: {}".format( slot.name )
def propagateDirty(self, slot, subindex, roi):
# Output slots are directly connected to internal operators
pass
class OpMultiLaneDataSelectionGroup( OpMultiLaneWrapper ):
# TODO: Provide output slots DatasetsByRole and ImagesByRole as a convenience
# to save clients the trouble of instantiating/using OpTransposeSlots.
def __init__(self, forceAxisOrder=False, *args, **kwargs):
kwargs.update( { 'operator_kwargs' : {'forceAxisOrder' : forceAxisOrder},
'broadcastingSlotNames' : ['ProjectFile', 'ProjectDataGroup', 'WorkingDirectory', 'DatasetRoles'] } )
super( OpMultiLaneDataSelectionGroup, self ).__init__(OpDataSelectionGroup, *args, **kwargs )
# 'value' slots
assert self.ProjectFile.level == 0
assert self.ProjectDataGroup.level == 0
assert self.WorkingDirectory.level == 0
assert self.DatasetRoles.level == 0
# Indexed by [lane][role]
assert self.DatasetGroup.level == 2, "DatasetGroup is supposed to be a level-2 slot, indexed by [lane][role]"
def addLane(self, laneIndex):
"""Reimplemented from base class."""
numLanes = len(self.innerOperators)
# Only add this lane if we don't already have it
# We might be called from within the context of our own insertSlot signal.
if numLanes == laneIndex:
super( OpMultiLaneDataSelectionGroup, self ).addLane( laneIndex )
def removeLane(self, laneIndex, finalLength):
"""Reimplemented from base class."""
numLanes = len(self.innerOperators)
if numLanes > finalLength:
super( OpMultiLaneDataSelectionGroup, self ).removeLane( laneIndex, finalLength )