Source code for dials.algorithms.spot_finding.factory

import functools
import logging
import pickle
import time

import numpy as np

from dxtbx.imageset import ImageSequence
from iotbx.phil import parse

import dials.extensions
import dials.util.masking
from dials.algorithms.background.simple import Linear2dModeller
from dials.algorithms.spot_finding.finder import SpotFinder
from dials.array_family import flex

logger = logging.getLogger(__name__)


[docs]def generate_phil_scope(): phil_scope = parse( """ spotfinder .help = "Parameters used in the spot finding algorithm." { include scope dials.data.lookup.phil_scope write_hot_mask = False .type = bool .help = "Write the hot mask" hot_mask_prefix = 'hot_mask' .type = str .help = "Prefix for the hot mask pickle file" force_2d = False .type = bool .help = "Do spot finding in 2D" scan_range = None .help = "The range of images to use in finding spots. The ranges are" "inclusive (e.g. j0 <= j < j1)." "For sequences the scan range is interpreted as the literal scan" "range. Whereas for imagesets the scan range is interpreted as" "the image number in the imageset. Multiple ranges can be" "specified by repeating the scan_range= parameter." .type = ints(size=2) .multiple = True region_of_interest = None .type = ints(size=4) .help = "A region of interest to look for spots." "Specified as: x0,x1,y0,y1" "The pixels x0 and y0 are included in the range but the pixels x1 and y1" "are not. To specify an ROI covering the whole image set" "region_of_interest=0,width,0,height." compute_mean_background = False .type = bool .help = "Compute the mean background for each image" filter .help = "Parameters used in the spot finding filter strategy." { min_spot_size = Auto .help = "The minimum number of contiguous pixels for a spot" "to be accepted by the filtering algorithm." .type = int(value_min=1) max_spot_size = 1000 .help = "The maximum number of contiguous pixels for a spot" "to be accepted by the filtering algorithm." .type = int(value_min=1, allow_none=False) max_separation = 2 .help = "The maximum peak-to-centroid separation (in pixels)" "for a spot to be accepted by the filtering algorithm." .type = float(value_min=0) .expert_level = 1 max_strong_pixel_fraction = 0.25 .help = "If the fraction of pixels in an image marked as strong is" "greater than this value, throw an exception" .type = float(value_min=0, value_max=1) background_gradient .expert_level=2 { filter = False .type = bool background_size = 2 .type = int(value_min=1) gradient_cutoff = 4 .type = float(value_min=0) } spot_density .expert_level=2 { filter = False .type = bool } include scope dials.util.masking.phil_scope } mp { method = *none drmaa sge lsf pbs .type = choice .help = "The cluster method to use" njobs = 1 .type = int(value_min=1) .help = "The number of cluster jobs to use" nproc = 1 .type = int(value_min=1) .help = "The number of processes to use per cluster job" chunksize = auto .type = int(value_min=1) .help = "The number of jobs to process per process" min_chunksize = 20 .type = int(value_min=1) .help = "When chunksize is auto, this is the minimum chunksize" } } """, process_includes=True, ) main_scope = phil_scope.get_without_substitution("spotfinder") assert len(main_scope) == 1 main_scope = main_scope[0] main_scope.adopt_scope(dials.extensions.SpotFinderThreshold.phil_scope()) return phil_scope
phil_scope = generate_phil_scope()
[docs]class FilterRunner: """ A class to run multiple filters in succession. """
[docs] def __init__(self, filters=None): """ Initialise with a list of filters. :param filters: The list of filters """ if filters is None: self.filters = [] else: self.filters = filters
def __call__(self, flags, **kwargs): """ Call the filters one by one. :param flags: The input flags :returns: The filtered flags """ flags = self.check_flags(flags, **kwargs) for f in self.filters: flags = f(flags, **kwargs) return flags
[docs] def check_flags( self, flags, predictions=None, observations=None, shoeboxes=None, **kwargs # noqa: U100 ): """ Check the flags are set, if they're not then create a list of Trues equal to the number of items given. :param flags: The input flags :param predictions: The predictions :param observations: The observations :param shoeboxes: The shoeboxes :return: The filtered flags """ # If flags are not set then create a list of Trues if flags is None: length = 0 if predictions: length = len(predictions) if observations: if length > 0: assert length == len(observations) else: length = len(observations) if shoeboxes: if length > 0: assert length == len(observations) else: length = len(shoeboxes) # Create an array of flags flags = flex.bool(length, True) # Return the flags return flags
[docs]class PeakCentroidDistanceFilter:
[docs] def __init__(self, maxd): """ Initialise :param maxd: The maximum distance allowed """ self.maxd = maxd
[docs] def run(self, flags, observations=None, shoeboxes=None, **kwargs): # noqa: U100 """ Run the filtering. """ # Get the peak locations and the centroids and return the flags of # those closer than the min distance peak = shoeboxes.peak_coordinates() cent = observations.centroids().px_position() return flags.__and__((peak - cent).norms() <= self.maxd)
def __call__(self, flags, **kwargs): """Call the filter and print information.""" num_before = flags.count(True) flags = self.run(flags, **kwargs) num_after = flags.count(True) logger.info( "Filtered %d of %d spots by peak-centroid distance", num_after, num_before ) return flags
[docs]class BackgroundGradientFilter:
[docs] def __init__(self, background_size=2, gradient_cutoff=4): self.background_size = background_size self.gradient_cutoff = gradient_cutoff
[docs] def run(self, flags, sequence=None, shoeboxes=None, **kwargs): # noqa: U100 modeller = Linear2dModeller() detector = sequence.get_detector() # sort shoeboxes by centroid z frame = shoeboxes.centroid_all().position_frame() perm = flex.sort_permutation(frame) shoeboxes = shoeboxes.select(perm) buffer_size = 1 bg_plus_buffer = self.background_size + buffer_size t0 = time.time() for i, shoebox in enumerate(shoeboxes): if not flags[perm[i]]: continue panel = detector[shoebox.panel] max_x, max_y = panel.get_image_size() bbox = shoebox.bbox x1, x2, y1, y2, z1, z2 = bbox # expand the bbox with a background region around the spotfinder shoebox # perhaps also should use a buffer zone between the shoebox and the # background region expanded_bbox = ( max(0, x1 - bg_plus_buffer), min(max_x, x2 + bg_plus_buffer), max(0, y1 - bg_plus_buffer), min(max_y, y2 + bg_plus_buffer), z1, z2, ) shoebox.bbox = expanded_bbox t1 = time.time() logger.info("Time expand_shoebox: %s", t1 - t0) rlist = flex.reflection_table() rlist["shoebox"] = shoeboxes rlist["shoebox"].allocate() rlist["panel"] = shoeboxes.panels() rlist["bbox"] = shoeboxes.bounding_boxes() rlist.extract_shoeboxes(sequence) shoeboxes = rlist["shoebox"] shoeboxes.flatten() for i, shoebox in enumerate(shoeboxes): if not flags[perm[i]]: continue panel = detector[shoebox.panel] trusted_range = panel.get_trusted_range() ex1, ex2, ey1, ey2, ez1, ez2 = shoebox.bbox data = shoebox.data mask = flex.bool(data.accessor(), False) for i_y, y in enumerate(range(ey1, ey2)): for i_x, x in enumerate(range(ex1, ex2)): value = data[0, i_y, i_x] if ( y >= (ey1 + buffer_size) and y < (ey2 - buffer_size) and x >= (ex1 + buffer_size) and x < (ex2 - buffer_size) ): mask[0, i_y, i_x] = False # foreground elif value > trusted_range[0] and value < trusted_range[1]: mask[0, i_y, i_x] = True # background model = modeller.create(data.as_double(), mask) d, a, b = model.params()[:3] if abs(a) > self.gradient_cutoff or abs(b) > self.gradient_cutoff: flags[perm[i]] = False return flags
def __call__(self, flags, **kwargs): """Call the filter and print information.""" num_before = flags.count(True) flags = self.run(flags, **kwargs) num_after = flags.count(True) logger.info( "Filtered %d of %d spots by background gradient", num_after, num_before ) return flags
[docs]class SpotDensityFilter:
[docs] def __init__(self, nbins=50, gradient_cutoff=0.002): self.nbins = nbins self.gradient_cutoff = gradient_cutoff
[docs] def run(self, flags, sequence=None, observations=None, **kwargs): # noqa: U100 obs_x, obs_y = observations.centroids().px_position_xy().parts() H, xedges, yedges = np.histogram2d( obs_x.as_numpy_array(), obs_y.as_numpy_array(), bins=self.nbins ) H_flex = flex.double(H.flatten().astype(np.float64)) n_slots = min(int(flex.max(H_flex)), 30) hist = flex.histogram(H_flex, n_slots=n_slots) slots = hist.slots() cumulative_hist = flex.long(len(slots)) for i, slot in enumerate(slots): cumulative_hist[i] = slot if i > 0: cumulative_hist[i] += cumulative_hist[i - 1] cumulative_hist = cumulative_hist.as_double() / flex.max( cumulative_hist.as_double() ) cutoff = None gradients = flex.double() for i in range(len(slots) - 1): x1 = cumulative_hist[i] x2 = cumulative_hist[i + 1] g = (x2 - x1) / hist.slot_width() gradients.append(g) if ( cutoff is None and i > 0 and g < self.gradient_cutoff and gradients[i - 1] < self.gradient_cutoff ): cutoff = hist.slot_centers()[i - 1] - 0.5 * hist.slot_width() sel = np.column_stack(np.where(H > cutoff)) for (ix, iy) in sel: flags.set_selected( ( (obs_x > xedges[ix]) & (obs_x < xedges[ix + 1]) & (obs_y > yedges[iy]) & (obs_y < yedges[iy + 1]) ), False, ) return flags
def __call__(self, flags, **kwargs): """Call the filter and print information.""" num_before = flags.count(True) flags = self.run(flags, **kwargs) num_after = flags.count(True) logger.info("Filtered %d of %d spots by spot density", num_after, num_before) return flags
[docs]class SpotFinderFactory: """ Factory class to create spot finders """
[docs] @staticmethod def from_parameters(params=None, experiments=None, is_stills=False): """ Given a set of parameters, construct the spot finder :param params: The input parameters :param is_stills: [ADVANCED] Force still-handling of experiment ID remapping for dials.stills_process. :returns: The spot finder instance """ if params is None: params = phil_scope.fetch(source=parse("")).extract() if params.spotfinder.force_2d and params.output.shoeboxes is False: no_shoeboxes_2d = True elif experiments is not None and params.output.shoeboxes is False: no_shoeboxes_2d = False all_stills = True for experiment in experiments: if isinstance(experiment.imageset, ImageSequence): all_stills = False break if all_stills: no_shoeboxes_2d = True else: no_shoeboxes_2d = False # Read in the lookup files mask = SpotFinderFactory.load_image(params.spotfinder.lookup.mask) params.spotfinder.lookup.mask = mask # Configure the filter options filter_spots = SpotFinderFactory.configure_filter(params) # Create the threshold strategy threshold_function = SpotFinderFactory.configure_threshold(params) mask_generator = functools.partial( dials.util.masking.generate_mask, params=params.spotfinder.filter ) # Make sure 'none' is interpreted as None if params.spotfinder.mp.method == "none": params.spotfinder.mp.method = None # Setup the spot finder return SpotFinder( threshold_function=threshold_function, mask=params.spotfinder.lookup.mask, filter_spots=filter_spots, scan_range=params.spotfinder.scan_range, write_hot_mask=params.spotfinder.write_hot_mask, hot_mask_prefix=params.spotfinder.hot_mask_prefix, mp_method=params.spotfinder.mp.method, mp_nproc=params.spotfinder.mp.nproc, mp_njobs=params.spotfinder.mp.njobs, mp_chunksize=params.spotfinder.mp.chunksize, max_strong_pixel_fraction=params.spotfinder.filter.max_strong_pixel_fraction, compute_mean_background=params.spotfinder.compute_mean_background, region_of_interest=params.spotfinder.region_of_interest, mask_generator=mask_generator, min_spot_size=params.spotfinder.filter.min_spot_size, max_spot_size=params.spotfinder.filter.max_spot_size, no_shoeboxes_2d=no_shoeboxes_2d, min_chunksize=params.spotfinder.mp.min_chunksize, is_stills=is_stills, )
[docs] @staticmethod def configure_threshold(params): """ Get the threshold strategy :param params: The input parameters :return: The threshold algorithm """ # Configure the algorithm Algorithm = dials.extensions.SpotFinderThreshold.load( params.spotfinder.threshold.algorithm ) return Algorithm(params)
[docs] @staticmethod def configure_filter(params): """ Get the filter strategy. :param params: The input parameters :return: The filter algorithm """ # Initialise an empty list of filters filters = [] # Add a peak-centroid distance filter if params.spotfinder.filter.max_separation is not None: filters.append( PeakCentroidDistanceFilter(params.spotfinder.filter.max_separation) ) if params.spotfinder.filter.background_gradient.filter: bg_filter_params = params.spotfinder.filter.background_gradient filters.append( BackgroundGradientFilter( background_size=bg_filter_params.background_size, gradient_cutoff=bg_filter_params.gradient_cutoff, ) ) if params.spotfinder.filter.spot_density.filter: filters.append(SpotDensityFilter()) # Return the filter runner with the list of filters return FilterRunner(filters)
[docs] @staticmethod def load_image(filename_or_data): """ Given a filename, load an image. If the data is already loaded, return it. :param filename_or_data: The input filename (or data) :return: The image or None """ # If no filename is set then return None if not filename_or_data: return None # If it's already loaded, return early if isinstance(filename_or_data, tuple): return filename_or_data # Read the image and return the image data with open(filename_or_data, "rb") as fh: image = pickle.load(fh, encoding="bytes") if not isinstance(image, tuple): image = (image,) return image