dials.scale

Introduction

This program performs scaling on integrated datasets, which attempts to improve the internal consistency of the reflection intensities by correcting for various experimental effects. By default, a physical scaling model is used, with scale, decay and absorption components. If multiple input files have been specified, the datasets will be jointly scaled against a common target of unique reflection intensities.

The program outputs one scaled.pickle and scaled_experiments.json file, which contains reflection data and scale models, from one or more experiments. The output pickle file contains intensity.scale.value, the unscaled intensity values used to determine the scaling model, and a inverse scale factor per reflection. These values can then be used to merge the data for downstream structural solution. Alternatively, the scaled_experiments.json and scaled.pickle files can be passed back to dials.scale, and further scaling will be performed, starting from where the previous job finished.

The scaling models determined by this program can be plotted with:

dials.plot_scaling_models scaled.pickle scaled_experiments.json

Example use cases

Regular single-sweep scaling, with no absorption correction:

dials.scale integrated.pickle integrated_experiments.json absorption_term=False

Scaling multiple datasets, specifying scale parameter interval:

dials.scale 1_integrated.pickle 1_integrated_experiments.json 2_integrated.pickle 2_integrated_experiments.json scale_interval=10.0

Incremental scaling (with different options per dataset):

dials.scale integrated.pickle integrated_experiments.json scale_interval=10.0

dials.scale integrated_2.pickle integrated_experiments_2.json scaled.pickle scaled_experiments.json scale_interval=15.0

Basic parameters

debug = False
model = physical array KB
stats_only = False
output {
  log = dials.scale.log
  debug.log = dials.scale.debug.log
  experiments = "scaled_experiments.json"
  reflections = "scaled.pickle"
  unmerged_mtz = None
  merged_mtz = None
}
overwrite_existing_models = False
parameterisation {
}
reflection_selection {
  method = *auto quasi_random intensity_ranges use_all
  quasi_random {
    multi_dataset {
      min_per_dataset = 500
      Isigma_cutoff = 1.0
      min_multiplicity = 2
    }
  }
  best_unit_cell = None
}
weighting {
  optimise_errors = True
}
cut_data {
}
scaling_options {
}
cross_validation {
}
dataset_selection {
}

Full parameter definitions

debug = False
  .help = "Output additional debugging information"
  .type = bool
model = physical array KB
  .help = "Set scaling model to be applied to input datasets without
         "
          "  an existing model. "
  .type = choice
  .expert_level = 0
stats_only = False
  .help = "Only read input files and output merging stats."
  .type = bool
output {
  log = dials.scale.log
    .help = "The log filename"
    .type = str
  debug.log = dials.scale.debug.log
    .help = "The debug log filename"
    .type = str
  experiments = "scaled_experiments.json"
    .help = "Option to set filepath for output json."
    .type = str
  reflections = "scaled.pickle"
    .help = "Option to set filepath for output pickle file of scaled
         "
            "     intensities."
    .type = str
  unmerged_mtz = None
    .help = "Filename to export an unmerged_mtz, calls dials.export"
            "internally."
    .type = path
  merged_mtz = None
    .help = "Filename to export a merged_mtz file."
    .type = path
  crystal_name = XTAL
    .help = "The crystal name to be exported in the mtz file metadata"
    .type = str
    .expert_level = 1
  use_internal_variance = False
    .help = "Option to use internal spread of the intensities when merging
   "
            "          reflection groups and calculating sigI, rather than"
            "using the
              sigmas of the individual reflections."
    .type = bool
    .expert_level = 1
  merging.nbins = 20
    .help = "Number of bins to use for calculating and plotting merging stats."
    .type = int(allow_none=True)
    .expert_level = 1
  delete_integration_shoeboxes = True
    .help = "Discard integration shoebox data from scaling output, to help"
            "with memory management."
    .type = bool
    .expert_level = 2
}
overwrite_existing_models = False
  .help = "If True, create new scaling models for all datasets"
  .type = bool
  .expert_level = 0
parameterisation {
  scale_term = True
    .help = "Option to turn off scale correction (for physical/KB
            "
            "  default models)."
    .type = bool
    .expert_level = 2
  scale_interval = 15.0
    .help = "Rotation (phi) interval between model parameters for the scale
  "
            "            component (physical model)."
    .type = float(value_min=1, allow_none=True)
    .expert_level = 1
  decay_term = True
    .help = "Option to turn off decay correction (for physical/array/KB
      "
            "        default models)."
    .type = bool
    .expert_level = 1
  decay_interval = 20.0
    .help = "Rotation (phi) interval between model parameters for the decay
  "
            "            component (physical/array default models)."
    .type = float(value_min=1, allow_none=True)
    .expert_level = 1
  n_resolution_bins = 10
    .help = "Number of resolution bins to use for the decay term in the
      "
            "        array-based model."
    .type = int(value_min=1, allow_none=True)
    .expert_level = 1
  decay_restraint = 1e-1
    .help = "Weight to weakly restrain B-values to 0 for physical model."
    .type = float(value_min=0, allow_none=True)
    .expert_level = 2
  absorption_term = True
    .help = "Option to turn off absorption correction (for physical/array
    "
            "          default models)."
    .type = bool
    .expert_level = 1
  lmax = 4
    .help = "Number of spherical harmonics to include for absorption
         "
            "    correction (for physical default model), recommended to be"
            "no
              more than 6."
    .type = int(value_min=2, allow_none=True)
    .expert_level = 1
  surface_weight = 1e6
    .help = "Restraint weight applied to spherical harmonic terms in the
     "
            "         physical model absorption correction."
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  modulation_term = False
    .help = "Option to turn on a detector correction for the array default
   "
            "           model."
    .type = bool
    .expert_level = 2
  n_modulation_bins = 20
    .help = "Number of bins in each dimension (applied to both x and y) for
  "
            "           binning the detector position for the modulation term"
            "of the
              array model."
    .type = int(value_min=1, allow_none=True)
    .expert_level = 2
  n_absorption_bins = 3
    .help = "Number of bins in each dimension (applied to both x and y) for
  "
            "           binning the detector position for the absorption term"
            "of the
              array model."
    .type = int(value_min=1, allow_none=True)
    .expert_level = 1
}
reflection_selection {
  method = *auto quasi_random intensity_ranges use_all
    .help = "Method to use when choosing a reflection subset for scaling model"
            "minimisation. auto (default) will choose use_all for small"
            "datasets and quasi_random for large datasets, and will try to"
            "optimise the quasi_random algorithm parameters. Manually"
            "selecting quasi-random will use the"
            "reflection_selection.quasi_random parameters to attempt to choose"
            "reflection groups that have a good connectedness across"
            "reciprocal space, for all resolutions. intensity_ranges uses the"
            "E2_range, Isigma_range and d_range options to choose a subset of"
            "reflections. use_all uses all suitable reflections for model"
            "minimisation, which may be slow for large datasets."
    .type = choice
  quasi_random {
    min_per_area = 100
      .help = "Numbers of reflections for each of the 12 volumes in reciprocal"
              "space at a given resolution."
      .type = ints
      .expert_level = 2
    n_resolution_bins = 20
      .help = "Number of resolution bins for quasi random sampling."
      .type = ints
      .expert_level = 2
    multi_dataset {
      min_per_dataset = 500
        .help = "Minimum number of cross-dataset connected reflections in each"
                "dataset."
        .type = int(allow_none=True)
      Isigma_cutoff = 1.0
        .help = "Minimum average I/sigma of reflection groups to use when"
                "selecting cross-dataset connected reflections."
        .type = float(allow_none=True)
      min_multiplicity = 2
        .help = "Minimum multiplicity of cross-dataset connected reflections"
                "for reflections used during minimisation."
        .type = int(allow_none=True)
    }
  }
  best_unit_cell = None
    .help = "Best unit cell value, to use when performing resolution cutting
 "
            "             and merging statistics. If None, the median unit"
            "cell will be used."
    .type = floats(size=6)
  E2_range = 0.8, 5.0
    .help = "Minimum and maximum normalised E^2 value to used to select a
    "
            "         subset of reflections for minimising the scaling model."
    .type = floats(size=2)
    .expert_level = 1
  Isigma_range = -5.0, 0.0
    .help = "Minimum and maximum I/sigma values used to subset of reflections
"
            "             to determine the scaling model. Here a value of 0.0"
            "for the max
              means no limit applied."
    .type = floats(size=2)
    .expert_level = 1
  d_range = None
    .help = "Minimum and maximum - values used to subset of reflections
      "
            "       to determine the scaling model."
    .type = floats(size=2)
    .expert_level = 1
  min_partiality = 0.6
    .help = "Minimum partiality to use when selecting subset of reflections
  "
            "            to determine the scaling model."
    .type = float(allow_none=True)
    .expert_level = 2
  intensity_choice = profile sum *combine
    .help = "Option to choose from profile fitted or summation intensities,"
            "or
               an optimised combination of profile/sum."
    .type = choice
    .expert_level = 1
  combine.Imid = None
    .help = "A list of values to try for the midpoint, for profile/sum"
            "combination
               calculation: the value with the lowest"
            "Rmeas will be chosen.
               0 and 1 are special values"
            "that can be supplied to include profile
               and sum"
            "respectively in the comparison."
    .type = floats
    .expert_level = 2
  combine.joint_analysis = True
    .help = "Option of whether to do intensity combination optimisation
      "
            "       separately (i.e. different Imid per dataset) or joint for
"
            "             multiple datasets"
    .type = bool
    .expert_level = 2
}
weighting {
  weighting_scheme = *invvar
    .help = "Weighting scheme used during Ih calculation. Weighting schemes
  "
            "           other than invvar and unity may trigger iterative"
            "reweighting
              during minimisation, which may be"
            "unstable for certain minimisation
              engines (LBFGS)."
    .type = choice
    .expert_level = 2
  optimise_errors = True
    .help = "Option to allow optimisation of weights for scaling. Performs
   "
            "           and additional scale factor minimisation after"
            "adjusting weights."
    .type = bool
    .expert_level = 0
  error_model = *basic
    .help = "The name of the error model to use, if optimise_errors is True."
    .type = choice
    .expert_level = 1
  output_optimised_vars = True
    .help = "If True, the error model determined will be applied to the
      "
            "       intensity variances in the output files. This may result"
            "in
              a significant increase or decrease in the"
            "variances. The default
              is True as with the default"
            "inverse variance weighting scheme,
              the modified"
            "variances have been used as weights in scaling and
             "
            "therefore should be used as the variances when calculating"
            "merged
              intensities downstream. If this is"
            "distorting the data too much,
              then it is likely"
            "that the chosen error model is inappropriate."
    .type = bool
    .expert_level = 2
}
cut_data {
  d_min = None
    .help = "Option to apply a high resolution cutoff for the dataset (i.e.
  "
            "            the chosen reflections have d > d_min)."
    .type = float(allow_none=True)
    .expert_level = 1
  d_max = None
    .help = "Option to apply a low resolution cutoff for the dataset (i.e.
   "
            "           the chosen reflections have d < d_max)."
    .type = float(allow_none=True)
    .expert_level = 1
  partiality_cutoff = 0.2
    .help = "Value below which reflections are removed from the dataset due
  "
            "            to low partiality."
    .type = float(allow_none=True)
    .expert_level = 1
}
scaling_options {
  target_cycle = True
    .help = "Option to turn of initial round of targeted scaling
             "
            " if some datasets are already scaled."
    .type = bool
    .expert_level = 2
  only_target = False
    .help = "Option to only do targeted scaling if some datasets
             "
            " are already scaled."
    .type = bool
    .expert_level = 2
  only_save_targeted = True
    .help = "If only_target is true, this option to change whether the"
            "dataset
              that is being scaled will be saved on its"
            "own, or combined with the
              already scaled dataset."
    .type = bool
    .expert_level = 2
  target_model = None
    .help = "Path to cif file to use to calculate target intensities for
     "
            "        scaling."
    .type = path
    .expert_level = 2
  target_mtz = None
    .help = "Path to merged mtz file to use as a target for scaling."
    .type = path
    .expert_level = 2
  nproc = 1
    .help = "Number of blocks to divide the data into for minimisation.
      "
            "       This also sets the number of processes to use if the"
            "option is
              available."
    .type = int(value_min=1, allow_none=True)
    .expert_level = 2
  use_free_set = False
    .help = "Option to use a free set during scaling to check for"
            "overbiasing.
              This free set is used to calculate an"
            "RMSD, which is shown alongisde
              the 'working' RMSD"
            "during refinement, but is not currently used
              to"
            "terminate refinement or make any choices on the model."
    .type = bool
    .expert_level = 2
  free_set_percentage = 10.0
    .help = "Percentage of symmetry equivalent groups to use for the free"
            "set,
              if use_free_set is True."
    .type = float(allow_none=True)
    .expert_level = 2
  free_set_offset = 0
    .help = "Offset for choosing unique groups for the free set from the"
            "whole
               set of unique groups."
    .type = int(allow_none=True)
    .expert_level = 2
  space_group = None
    .help = "Option to specify space group for scaling"
    .type = str
    .expert_level = 1
  concurrent = True
    .help = "Option to allow consecutive scaling if concurrent is
            "
            "  set to False. The consecutive order is defined (and fixed)
    "
            "          for each scaling model."
    .type = bool
    .expert_level = 2
  full_matrix = True
    .help = "Option to turn off GN/LM refinement round used to determine
     "
            "         error estimates on scale factors."
    .type = bool
    .expert_level = 2
  outlier_rejection = *standard simple
    .help = "Choice of outlier rejection routine. Standard may take a
       "
            "significant amount of time to run for large datasets or high
    "
            "   multiplicities, whereas simple should be quick for these"
            "datasets."
    .type = choice
    .expert_level = 1
  outlier_zmax = 6.0
    .help = "Cutoff z-score value for identifying outliers based on their
    "
            "          normalised deviation within the group of equivalent"
            "reflections"
    .type = float(value_min=3, allow_none=True)
    .expert_level = 1
  verbosity = 2
    .help = "The verbosity level"
    .type = int(value_min=0, allow_none=True)
    .expert_level = 2
}
cross_validation {
  cross_validation_mode = multi single
    .help = "Choose the cross validation running mode, for a full description"
            "see the module docstring. Choice is used for testing a parameter"
            "that can only have discreet values (a choice or bool phil"
            "parameter). Variable is used for testing a parameter that can"
            "have a float or int value (that is also not a 'choice' type)."
            "Single just performs cross validation on one parameter"
            "configuration."
    .type = choice
    .expert_level = 2
  parameter = None
    .help = "Optimise a command-line parameter. parameter_values must also be"
            "specified, unless the parameter is a True/False option."
    .type = str
    .expert_level = 2
  parameter_values = None
    .help = "Parameter values to compare, entered as a string of space"
            "separated values."
    .type = strings
    .expert_level = 2
  nfolds = 1
    .help = "Number of cross-validation folds to perform. If nfolds > 1, the"
            "minimisation for each option is repeated nfolds times, with an"
            "incremental offset for the free set. The max number of folds"
            "allowed is 1/free_set_percentage; if set greater than this then"
            "the repetition will finish afer 1/free_set_percentage folds."
    .type = int(value_min=1, allow_none=True)
    .expert_level = 2
  log = dials.cross_validate.log
    .help = "The log filename"
    .type = str
    .expert_level = 2
  debug.log = dials.cross_validate.debug.log
    .help = "The debug log filename"
    .type = str
    .expert_level = 2
}
scaling_refinery
  .help = "Parameters to configure the refinery"
  .expert_level = 1
{
  engine = *SimpleLBFGS GaussNewton LevMar
    .help = "The minimisation engine to use for the main scaling algorithm"
    .type = choice
  max_iterations = None
    .help = "Maximum number of iterations in refinement before termination."
            "None implies the engine supplies its own default."
    .type = int(value_min=1, allow_none=True)
  rmsd_tolerance = 0.0001
    .help = "Tolerance at which to stop scaling refinement. This is a"
            "relative
            value, the convergence criterion is (rmsd[i]"
            "- rmsd[i-1])/rmsd[i] <
            rmsd_tolerance."
    .type = float(value_min=1e-06, allow_none=True)
  full_matrix_engine = GaussNewton *LevMar
    .help = "The minimisation engine to use for a full matrix round of
       "
            "     minimisation after the main scaling, in order to determine
 "
            "           error estimates."
    .type = choice
  full_matrix_max_iterations = None
    .help = "Maximum number of iterations before termination in the full"
            "matrix
             minimisation round. None implies the engine"
            "supplies its own default."
    .type = int(value_min=1, allow_none=True)
}
exclude_images = None
  .help = "Input in the format exp:start:end Exclude a range of images (start,"
          "stop) from the dataset with experiment identifier exp  (inclusive"
          "of frames start, stop)."
  .type = strings
  .multiple = True
  .expert_level = 1
dataset_selection {
  use_datasets = None
    .help = "Choose a subset of datasets, based on the dataset id (as defined
"
            "              in the reflection table), to use from a"
            "multi-dataset input."
    .type = strings
    .expert_level = 2
  exclude_datasets = None
    .help = "Choose a subset of datasets, based on the dataset id (as defined
"
            "              in the reflection table), to exclude from a"
            "multi-dataset input."
    .type = strings
    .expert_level = 2
}