Source code for dials.algorithms.refinement.target

"""Contains classes used to construct a target function for refinement,
principally Target and ReflectionManager."""


import math

from libtbx.phil import parse
from scitbx import sparse
from scitbx.array_family import flex

phil_str = """
    rmsd_cutoff = *fraction_of_bin_size absolute
      .help = "Method to choose rmsd cutoffs. This is currently either as a"
              "fraction of the discrete units of the spot positional data, i.e."
              "(pixel width, pixel height, image thickness in phi), or a tuple"
              "of absolute values to use as the cutoffs"
      .type = choice

    bin_size_fraction = 0.0
      .help = "Set this to a fractional value, say 0.2, to make a cut off in"
              "the natural discrete units of positional data, viz.,"
              "(pixel width, pixel height, image thickness in phi). This would"
              "then determine when the RMSD target is achieved. Only used if"
              "rmsd_cutoff = fraction_of_bin_size."
      .type = float(value_min=0.)

    absolute_cutoffs = None
      .help = "Absolute Values for the RMSD target achieved cutoffs in X, Y and"
              "Phi. The units are (mm, mm, rad)."
      .type = floats(size=3, value_min=0.)

    gradient_calculation_blocksize = None
      .help = "Maximum number of reflections to use for gradient calculation."
              "If there are more reflections than this in the manager then"
              "the minimiser must do the full calculation in blocks."
      .type = int(value_min=1)
"""
phil_scope = parse(phil_str)

# constants
RAD_TO_DEG = 180.0 / math.pi


[docs]class TargetFactory:
[docs] @staticmethod def from_parameters_and_experiments( params, experiments, reflection_manager, predictor, pred_param, restraints_param, do_stills=False, do_sparse=False, ): """Given a set of parameters, configure a factory to build a target function Params: params The input parameters Returns: The target factory instance """ if params.rmsd_cutoff == "fraction_of_bin_size": absolute_cutoffs = None elif params.rmsd_cutoff == "absolute": absolute_cutoffs = params.absolute_cutoffs else: raise RuntimeError( "Target function rmsd_cutoff option" + params.rmsd_cutoff + " not recognised" ) # Determine whether the target is in X, Y, Phi space or just X, Y to choose # the right Target to instantiate if do_stills: if do_sparse: from dials.algorithms.refinement.target_stills import ( LeastSquaresStillsResidualWithRmsdCutoffSparse as targ, ) else: from dials.algorithms.refinement.target_stills import ( LeastSquaresStillsResidualWithRmsdCutoff as targ, ) else: if do_sparse: targ = LeastSquaresPositionalResidualWithRmsdCutoffSparse else: targ = LeastSquaresPositionalResidualWithRmsdCutoff # Here we pass in None for prediction_parameterisation and # restraints_parameterisation, as these are not required for initialisation # and will be linked to the object later return targ( experiments=experiments, predictor=predictor, reflection_manager=reflection_manager, prediction_parameterisation=pred_param, restraints_parameterisation=restraints_param, frac_binsize_cutoff=params.bin_size_fraction, absolute_cutoffs=absolute_cutoffs, gradient_calculation_blocksize=params.gradient_calculation_blocksize, )
[docs]class Target: """Abstract interface for a target function class A Target object will be used by a Refinery. It will refer to a Reflection Manager to get a list of observations. It will perform reflection prediction on those observations and update the reflection manager with those predictions. It will then query the reflection manager to get a list of accepted reflections with observed and calculated positions. These are the reflections for use in refinement. It obtains the gradients of reflection positions from a relevant prediction parameterisation object. With all of this information in place, it calculates the value of the target function, the gradients of the target function and auxiliary information (e.g. RMSDs). Concrete instances of this class implement the actual target function calculation. The base class should not determine what type of target function is used (e.g. least squares target), or limit whether the object is used for a detector space & phi residual, or a reciprocal space residual. This should all be set by a derived class. """ _grad_names = ["dX_dp", "dY_dp", "dphi_dp"] rmsd_names = ["RMSD_X", "RMSD_Y", "RMSD_Phi"] rmsd_units = ["mm", "mm", "rad"]
[docs] def __init__( self, experiments, predictor, reflection_manager, prediction_parameterisation, restraints_parameterisation=None, gradient_calculation_blocksize=None, ): self._experiments = experiments self._reflection_predictor = predictor self._reflection_manager = reflection_manager self._prediction_parameterisation = prediction_parameterisation self._restraints_parameterisation = restraints_parameterisation # Quantities to cache each step self._rmsds = None self._matches = None # Keep maximum number of reflections used for Jacobian calculation, if # a cutoff is required self._gradient_calculation_blocksize = gradient_calculation_blocksize
@property def dim(self): """Get the number of dimensions of the target function""" return len(self._grad_names) def _predict_core(self, reflections, skip_derivatives=False): """perform prediction for the specified reflections""" # If the prediction parameterisation has a compose method (true for the scan # varying case) then call it. Prefer hasattr to try-except duck typing to # avoid masking AttributeErrors that could be raised within the method. if hasattr(self._prediction_parameterisation, "compose"): self._prediction_parameterisation.compose(reflections, skip_derivatives) # do prediction (updates reflection table in situ). Scan-varying prediction # is done automatically if the crystal has scan-points (assuming reflections # have ub_matrix set) self._reflection_predictor(reflections) x_obs, y_obs, phi_obs = reflections["xyzobs.mm.value"].parts() x_calc, y_calc, phi_calc = reflections["xyzcal.mm"].parts() # calculate residuals and assign columns reflections["x_resid"] = x_calc - x_obs reflections["x_resid2"] = reflections["x_resid"] ** 2 reflections["y_resid"] = y_calc - y_obs reflections["y_resid2"] = reflections["y_resid"] ** 2 reflections["phi_resid"] = phi_calc - phi_obs reflections["phi_resid2"] = reflections["phi_resid"] ** 2 return reflections
[docs] def predict(self): """perform reflection prediction for the working reflections and update the reflection manager""" # get the matches reflections = self._reflection_manager.get_obs() # reset the 'use' flag for all observations self._reflection_manager.reset_accepted_reflections() # predict reflections = self._predict_core(reflections) # set used_in_refinement flag to all those that had predictions mask = reflections.get_flags(reflections.flags.predicted) reflections.set_flags(mask, reflections.flags.used_in_refinement) # collect the matches self.update_matches(force=True)
[docs] def predict_for_free_reflections(self): """perform prediction for the reflections not used for refinement""" refs = self._reflection_manager.get_free_reflections() if len(refs) == 0: return refs return self._predict_core(refs)
[docs] def predict_for_reflection_table(self, reflections, skip_derivatives=False): """perform prediction for all reflections in the supplied table""" if "entering" not in reflections: reflections.calculate_entering_flags(self._experiments) # can only predict for experiments that exist and within the scan range # any other reflections will be left unchanged inc = flex.size_t_range(len(reflections)) to_keep = flex.bool(len(inc), False) for iexp, exp in enumerate(self._experiments): sel = reflections["id"] == iexp # keep all reflections if there is no rotation axis if exp.goniometer is None: to_keep.set_selected(sel, True) continue # trim reflections outside the scan range phi = reflections["xyzobs.mm.value"].parts()[2] phi_min, phi_max = exp.scan.get_oscillation_range(deg=False) passed = (phi >= phi_min) & (phi <= phi_max) to_keep.set_selected(sel, passed) # determine indices to include and predict on the subset inc = inc.select(to_keep) sub_refl = reflections.select(inc) preds = self._predict_core(sub_refl, skip_derivatives) # set updated subset back into place reflections.set_selected(inc, preds) return reflections
[docs] def calculate_gradients(self, reflections=None, callback=None): """delegate to the prediction_parameterisation object to calculate gradients for all the matched reflections, or just for those specified""" self.update_matches() if reflections: gradients = self._prediction_parameterisation.get_gradients( reflections, callback ) else: gradients = self._prediction_parameterisation.get_gradients( self._matches, callback ) return gradients
[docs] def get_num_matches(self): """return the number of reflections currently used in the calculation""" self.update_matches() return len(self._matches)
[docs] def get_num_matches_for_experiment(self, iexp=0): """return the number of reflections currently used in the calculation""" self.update_matches() sel = self._matches["id"] == iexp return sel.count(True)
[docs] def get_num_matches_for_panel(self, ipanel=0): """return the number of reflections currently used in the calculation""" self.update_matches() sel = self._matches["panel"] == ipanel return sel.count(True)
[docs] def update_matches(self, force=False): """ensure the observations matched to predictions are up to date""" if not self._matches or force: self._matches = self._reflection_manager.get_matches()
[docs] def compute_functional_gradients_and_curvatures(self, block=None): """calculate the value of the target function and its gradients. Set approximate curvatures as a side-effect""" self.update_matches() if block is not None: matches = block else: matches = self._matches nref = len(matches) # This is a hack for the case where nref=0. This should not be necessary # if bounds are provided for parameters to stop the algorithm exploring # unreasonable regions of parameter space where no predictions exist. # Unfortunately the L-BFGS line search does make such extreme trials. if nref == 0: self._curv = [1.0] * len(self._prediction_parameterisation) return 1.0e12, [1.0] * len(self._prediction_parameterisation) residuals, weights = self._extract_residuals_and_weights(matches) w_resid = weights * residuals residuals2 = self._extract_squared_residuals(matches) # calculate target function L = 0.5 * flex.sum(weights * residuals2) def process_one_gradient(result): # copy gradients out of the result in the right order grads = [result[key] for key in self._grad_names] # reset result for k in result: result[k] = None # add new keys grads = self._concatenate_gradients(grads) result["dL_dp"] = flex.sum(w_resid * grads) result["curvature"] = flex.sum(weights * grads * grads) return result results = self.calculate_gradients(matches, callback=process_one_gradient) dL_dp = [result["dL_dp"] for result in results] curvs = [result["curvature"] for result in results] return (L, dL_dp, curvs)
[docs] def compute_restraints_functional_gradients_and_curvatures(self): """use the restraints_parameterisation object, if present, to calculate the least squares restraints objective plus gradients and approximate curvatures""" if not self._restraints_parameterisation: return None ( residuals, jacobian, weights, ) = self._restraints_parameterisation.get_residuals_gradients_and_weights() w_resid = weights * residuals residuals2 = residuals * residuals # calculate target function L = 0.5 * flex.sum(weights * residuals2) # calculate gradients using the scalar product of sparse vector col with # dense vector w_resid dL_dp = [col * w_resid for col in jacobian.cols()] # calculate lsq approximation to curvatures using weighted dot product curvs = [sparse.weighted_dot(col, weights, col) for col in jacobian.cols()] return (L, dL_dp, curvs)
[docs] def compute_residuals(self): """return the vector of residuals plus their weights""" self.update_matches() return self._extract_residuals_and_weights(self._matches)
[docs] def split_matches_into_blocks(self, nproc=1): """Return a list of the matches, split into blocks according to the gradient_calculation_blocksize parameter and the number of processes (if relevant). The number of blocks will be set such that the total number of reflections being processed by concurrent processes does not exceed gradient_calculation_blocksize""" self.update_matches() # Need to be able to track the indices of the original matches table for # scan-varying gradient calculations. A simple and robust (but slightly # expensive) way to do this is to add an index column to the matches table self._matches["imatch"] = flex.size_t_range(len(self._matches)) if self._gradient_calculation_blocksize: nblocks = int( math.floor( len(self._matches) * nproc / self._gradient_calculation_blocksize ) ) else: nblocks = nproc # ensure at least 100 reflections per block nblocks = min(nblocks, int(len(self._matches) / 100)) nblocks = max(nblocks, 1) blocksize = int(math.floor(len(self._matches) / nblocks)) blocks = [] for block_num in range(nblocks - 1): start = block_num * blocksize end = (block_num + 1) * blocksize blocks.append(self._matches[start:end]) start = (nblocks - 1) * blocksize end = len(self._matches) blocks.append(self._matches[start:end]) return blocks
[docs] def compute_residuals_and_gradients(self, block=None): """return the vector of residuals plus their gradients and weights for non-linear least squares methods""" self.update_matches() if block is not None: matches = block else: matches = self._matches # The gradients are provided as a list of dictionaries, where the # dictionaries have keys corresponding to gradients of the different types # of residual involved. For example, for scans the keys are 'dX_dp', # 'dY_dp', 'dphi_dp'. Reshape this data structure so that all the gradients # of a particular type of residual are kept together gradients = self.calculate_gradients(matches) reshaped = [] for key in self._grad_names: reshaped.append([g[key] for g in gradients]) residuals, weights = self._extract_residuals_and_weights(matches) nelem = len(matches) * len(self._grad_names) nparam = len(self._prediction_parameterisation) jacobian = self._build_jacobian(reshaped, nelem=nelem, nparam=nparam) return (residuals, jacobian, weights)
[docs] def compute_restraints_residuals_and_gradients(self): """delegate to the restraints_parameterisation object, if present, to calculate the vector of restraints residuals plus their gradients and weights for non-linear least squares methods""" if self._restraints_parameterisation: ( residuals, jacobian, weights, ) = self._restraints_parameterisation.get_residuals_gradients_and_weights() return (residuals, jacobian, weights) else: return None
@staticmethod def _build_jacobian(grads_each_dim, nelem=None, nparam=None): """construct Jacobian from lists of gradient vectors. The elements of grads_each_dim refer to the gradients of each dimension of the problem (e.g. dX, dY, dZ). The elements for a single dimension give the arrays of gradients of the associated residual for each parameter. This method may be overridden for the case where these vectors use sparse storage""" jacobian = flex.double(flex.grid(nelem, nparam)) # loop over parameters for i in range(nparam): col = grads_each_dim[0][i] for g in grads_each_dim[1:]: col.extend(g[i]) jacobian.matrix_paste_column_in_place(col, i) return jacobian @staticmethod def _concatenate_gradients(grads): """concatenate gradient vectors and return a flex.double. This method may be overridden for the case where these vectors use sparse storage""" result = grads[0] for g in grads[1:]: result.extend(g) return result @staticmethod def _extract_residuals_and_weights(matches): """extract vector of residuals and corresponding weights. The space the residuals are measured in (e.g. X, Y and Phi) and the order they are returned is determined by a concrete implementation of a staticmethod""" raise NotImplementedError() @staticmethod def _extract_squared_residuals(matches): """extract vector of squared residuals. The space the residuals are measured in (e.g. X, Y and Phi) and the order they are returned is determined by a concrete implementation of a staticmethod""" raise NotImplementedError()
[docs] def rmsds(self): """calculate unweighted RMSDs for the matches""" self.update_matches() # cache rmsd calculation for achieved test self._rmsds = self._rmsds_core(self._matches) return self._rmsds
[docs] def rmsds_for_reflection_table(self, reflections): """calculate unweighted RMSDs for the specified reflections. Caution: this assumes that the table reflections has the keys expected by _rmsds_core""" n = len(reflections) if n == 0: return None return self._rmsds_core(reflections)
[docs] def rmsds_for_experiment(self, iexp=0): """calculate unweighted RMSDs for the selected experiment.""" self.update_matches() sel = self._matches["id"] == iexp n = sel.count(True) if n == 0: return None rmsds = self._rmsds_core(self._matches.select(sel)) return rmsds
[docs] def rmsds_for_panel(self, ipanel=0): """calculate unweighted RMSDs for the selected panel.""" self.update_matches() sel = self._matches["panel"] == ipanel n = sel.count(True) if n == 0: return None rmsds = self._rmsds_core(self._matches.select(sel)) return rmsds
[docs] def achieved(self): """return True to terminate the refinement.""" raise NotImplementedError()
[docs]class LeastSquaresPositionalResidualWithRmsdCutoff(Target): """An implementation of the target class providing a least squares residual in terms of detector impact position X, Y and phi, terminating on achieved rmsd (or on intrisic convergence of the chosen minimiser)"""
[docs] def __init__( self, experiments, predictor, reflection_manager, prediction_parameterisation, restraints_parameterisation, frac_binsize_cutoff=0.33333, absolute_cutoffs=None, gradient_calculation_blocksize=None, ): Target.__init__( self, experiments, predictor, reflection_manager, prediction_parameterisation, restraints_parameterisation, gradient_calculation_blocksize, ) # Set up the RMSD achieved criterion. For simplicity, we take models from # the first Experiment only. If this is not appropriate for refinement over # all experiments then absolute cutoffs should be used instead. if experiments[0].scan: image_width_rad = abs(experiments[0].scan.get_oscillation(deg=False)[1]) else: image_width_rad = None detector = experiments[0].detector if not absolute_cutoffs: pixel_sizes = [p.get_pixel_size() for p in detector] min_px_size_x = min(e[0] for e in pixel_sizes) min_px_size_y = min(e[1] for e in pixel_sizes) self._binsize_cutoffs = [ min_px_size_x * frac_binsize_cutoff, min_px_size_y * frac_binsize_cutoff, image_width_rad * frac_binsize_cutoff, ] else: assert len(absolute_cutoffs) == 3 self._binsize_cutoffs = absolute_cutoffs
@staticmethod def _extract_residuals_and_weights(matches): # return residuals and weights as 1d flex.double vectors residuals = flex.double.concatenate(matches["x_resid"], matches["y_resid"]) residuals.extend(matches["phi_resid"]) weights, w_y, w_z = matches["xyzobs.mm.weights"].parts() weights.extend(w_y) weights.extend(w_z) return residuals, weights @staticmethod def _extract_squared_residuals(matches): residuals2 = flex.double.concatenate(matches["x_resid2"], matches["y_resid2"]) residuals2.extend(matches["phi_resid2"]) return residuals2 def _rmsds_core(self, reflections): """calculate unweighted RMSDs for the specified reflections""" resid_x = flex.sum(reflections["x_resid2"]) resid_y = flex.sum(reflections["y_resid2"]) resid_phi = flex.sum(reflections["phi_resid2"]) n = len(reflections) rmsds = ( math.sqrt(resid_x / n), math.sqrt(resid_y / n), math.sqrt(resid_phi / n), ) return rmsds
[docs] def achieved(self): """RMSD criterion for target achieved""" r = self._rmsds if self._rmsds else self.rmsds() # reset cached rmsds to avoid getting out of step self._rmsds = None if ( r[0] < self._binsize_cutoffs[0] and r[1] < self._binsize_cutoffs[1] and r[2] < self._binsize_cutoffs[2] ): return True return False
[docs]class SparseGradientsMixin: """Mixin class to build a sparse Jacobian from gradients of the prediction formula stored as sparse vectors, and allow concatenation of gradient vectors that employed sparse storage.""" @staticmethod def _build_jacobian(grads_each_dim, nelem=None, nparam=None): """construct Jacobian from lists of sparse gradient vectors.""" nref = int(nelem / len(grads_each_dim)) blocks = [sparse.matrix(nref, nparam) for _ in grads_each_dim] jacobian = sparse.matrix(nelem, nparam) # loop over parameters, building full width blocks of the full Jacobian for i in range(nparam): for block, grad in zip(blocks, grads_each_dim): block[:, i] = grad[i] # set the blocks in the Jacobian for i, block in enumerate(blocks): jacobian.assign_block(block, (i * nref), 0) return jacobian @staticmethod def _concatenate_gradients(grads): """concatenate sparse gradient vectors and return a flex.double.""" result = grads[0].as_dense_vector() for g in grads[1:]: result.extend(g.as_dense_vector()) return result
[docs]class LeastSquaresPositionalResidualWithRmsdCutoffSparse( SparseGradientsMixin, LeastSquaresPositionalResidualWithRmsdCutoff ): """A version of the LeastSquaresPositionalResidualWithRmsdCutoff Target that uses a sparse matrix data structure for memory efficiency when there are a large number of Experiments""" pass