"""
phenix_knowledge.py
Central repository for Phenix-specific rules,
hints, and strategies.
"""

# 1. The Allow-List (Prevent Hallucinations)
# Use phenix_program_list
VALID_PHENIX_PROGRAMS = [
  'phenix.AmberPrep', 'phenix.HKLinfo', 'phenix.HKLviewer',
  'phenix.TAAM_minus_IAM', 'phenix.about', 'phenix.acknowledgments',
  'phenix.afitt_run_tests', 'phenix.alphafold_with_density_map',
  'phenix.amber_run_tests', 'phenix.analyze_alt_conf', 'phenix.analyze_log',
  'phenix.angle', 'phenix.anomalous_signal', 'phenix.apply_ncs',
  'phenix.aquaref', 'phenix.assign_sequence', 'phenix.auto_sharpen',
  'phenix.autobuild', 'phenix.automr', 'phenix.autosol',
  'phenix.average_map_coeffs', 'phenix.average_maps',
  'phenix.b_factor_statistics', 'phenix.barbed_wire_analysis', 'phenix.bash',
  'phenix.build_index_html', 'phenix.build_one_model',
  'phenix.build_rna_helices', 'phenix.cablam', 'phenix.cablam_idealization',
  'phenix.cbetadev', 'phenix.cc_star', 'phenix.chain_comparison',
  'phenix.chiral_validation', 'phenix.cif_as_mtz', 'phenix.cif_as_pdb',
  'phenix.clashscore', 'phenix.clashscore2', 'phenix.cns_as_mtz',
  'phenix.combine_focused_maps', 'phenix.combine_models', 'phenix.comparama',
  'phenix.compare_datasets', 'phenix.composite_omit_map',
  'phenix.condensation', 'phenix.create_alt_conf', 'phenix.csv_as_mtz',
  'phenix.cut_out_density', 'phenix.data_viewer',
  'phenix.default_gui_settings', 'phenix.density_modification',
  'phenix.density_modify_cryo_em', 'phenix.density_outside_model',
  'phenix.developer', 'phenix.deviation', 'phenix.diffuse', 'phenix.doc',
  'phenix.dock_and_rebuild', 'phenix.dock_in_map',
  'phenix.dock_predicted_model', 'phenix.douse', 'phenix.dynamics',
  'phenix.elbow', 'phenix.emma', 'phenix.emringer',
  'phenix.ensemble_refinement', 'phenix.ensembler', 'phenix.example_viewer',
  'phenix.explore_metric_symmetry', 'phenix.extract_unique_chains',
  'phenix.f000', 'phenix.fab_elbow_angle', 'phenix.fake_f_obs',
  'phenix.famos', 'phenix.feature_enhanced_map', 'phenix.fem',
  'phenix.fetch_pdb', 'phenix.fft', 'phenix.find_all_ligands',
  'phenix.find_alt_orig_sym_mate', 'phenix.find_coot_command',
  'phenix.find_helices_strands', 'phenix.find_ncs',
  'phenix.find_ncs_from_density', 'phenix.find_peaks_holes',
  'phenix.find_program', 'phenix.find_reference',
  'phenix.find_reticular_twin_laws', 'phenix.find_ss_from_ca',
  'phenix.find_tls_groups', 'phenix.fit_h', 'phenix.fit_loops',
  'phenix.fix_insertions_deletions', 'phenix.fmodel',
  'phenix.fobs_minus_fobs_map', 'phenix.form_factor_query',
  'phenix.fragment_search', 'phenix.french_wilson',
  'phenix.geometry_minimization', 'phenix.get_cc_ano', 'phenix.get_cc_iso',
  'phenix.get_cc_map_map', 'phenix.get_cc_mtz_mtz', 'phenix.get_cc_mtz_pdb',
  'phenix.get_latest_version', 'phenix.get_patterson_skew',
  'phenix.get_pdb_validation_report', 'phenix.get_smiles',
  'phenix.get_solution', 'phenix.get_struct_fact_from_md', 'phenix.glviewer',
  'phenix.grow_density', 'phenix.guess_molecular_centers',
  'phenix.guided_ligand_replacement', 'phenix.gyre_and_gimble',
  'phenix.has_wizard_started', 'phenix.hbond',
  'phenix.helix_sheet_recs_as_pdb_files', 'phenix.help',
  'phenix.holton_geometry_validation', 'phenix.homology', 'phenix.hyss',
  'phenix.image_viewer', 'phenix.import_project', 'phenix.import_project_db',
  'phenix.install_ai_tools', 'phenix.iterative_ss_refine', 'phenix.kinemage',
  'phenix.king', 'phenix.ligand_identification', 'phenix.ligand_linking',
  'phenix.ligand_pipeline', 'phenix.ligandfit', 'phenix.link_edits',
  'phenix.list', 'phenix.local_aniso_sharpen', 'phenix.local_resolution',
  'phenix.magref', 'phenix.map_box', 'phenix.map_comparison',
  'phenix.map_correlations', 'phenix.map_model_cc', 'phenix.map_sharpening',
  'phenix.map_symmetry', 'phenix.map_to_model',
  'phenix.map_to_model_histogram', 'phenix.map_to_object',
  'phenix.map_to_structure_factors', 'phenix.map_value_at_point',
  'phenix.map_values_along_line', 'phenix.maps', 'phenix.mask',
  'phenix.match_maps', 'phenix.maximum_entropy_map',
  'phenix.merge_models_as_alt_conf', 'phenix.merging_statistics',
  'phenix.metal_coordination', 'phenix.mia', 'phenix.model_idealization',
  'phenix.model_map', 'phenix.model_map_cc', 'phenix.model_model_distances',
  'phenix.model_statistics', 'phenix.model_vs_data', 'phenix.model_vs_map',
  'phenix.model_vs_xray_data', 'phenix.models_as_chains',
  'phenix.molprobity', 'phenix.mopac', 'phenix.morph_model', 'phenix.mosaic',
  'phenix.mp_validate_bonds', 'phenix.mr_model_preparation',
  'phenix.mr_rescoring', 'phenix.mr_rosetta', 'phenix.mr_rosetta_rebuild',
  'phenix.mrage', 'phenix.mtriage', 'phenix.mtz.dump', 'phenix.mtz2map',
  'phenix.mtz_as_cif', 'phenix.multi_crystal_average',
  'phenix.multistart_sa', 'phenix.muscle', 'phenix.ncs_and_number_of_ha',
  'phenix.ncs_average', 'phenix.oat', 'phenix.omegalyze',
  'phenix.one_good_model', 'phenix.parallel_autobuild', 'phenix.pathwalker',
  'phenix.pdb.b_factor_stats', 'phenix.pdb.biomt_reconstruction',
  'phenix.pdb.hierarchy', 'phenix.pdb.mtrix_reconstruction',
  'phenix.pdb_as_cif', 'phenix.pdb_atom_selection',
  'phenix.pdb_cif_conversion', 'phenix.pdb_editor',
  'phenix.pdb_interpretation', 'phenix.pdbtools', 'phenix.perigee',
  'phenix.phase_and_build', 'phenix.phaser', 'phenix.phaser.voyager',
  'phenix.phaser_mp', 'phenix.phasertng', 'phenix.phassade',
  'phenix.plan_sad_experiment', 'phenix.polder', 'phenix.predict_and_build',
  'phenix.prime_and_switch_map', 'phenix.print_sequence', 'phenix.probe',
  'phenix.process_predicted_model', 'phenix.protonation_from_map',
  'phenix.pulchra', 'phenix.pymol', 'phenix.python', 'phenix.query_docs',
  'phenix.r_factor_statistics', 'phenix.rama_z', 'phenix.ramalyze',
  'phenix.rank_scale_map', 'phenix.read_ncs_from_pdb', 'phenix.ready_set',
  'phenix.real_space_correlation', 'phenix.real_space_diff_map',
  'phenix.real_space_refine', 'phenix.rebuild_ai_database',
  'phenix.rebuild_cctbx_api', 'phenix.rebuild_model',
  'phenix.rebuild_phenix_api', 'phenix.reciprocal_space_arrays',
  'phenix.reduce', 'phenix.reduce_cryoem_resolution', 'phenix.reel',
  'phenix.refine', 'phenix.refine_ca_model',
  'phenix.reflection_file_converter', 'phenix.reflection_statistics',
  'phenix.reindex', 'phenix.remove_aniso', 'phenix.remove_clashes',
  'phenix.remove_free_from_map', 'phenix.remove_outliers',
  'phenix.remove_poor_fragments', 'phenix.renumber_fab',
  'phenix.replace_side_chains', 'phenix.replace_with_fragments_from_pdb',
  'phenix.resolution', 'phenix.resolve', 'phenix.resolve_cryo_em',
  'phenix.resolve_pattern', 'phenix.rest_server', 'phenix.reverse_fragment',
  'phenix.rna_pucker_assign', 'phenix.rna_validate',
  'phenix.rna_validate_bonds', 'phenix.rna_validate_puckers',
  'phenix.rna_validate_suites', 'phenix.rocket',
  'phenix.rosetta.run_phenix_interface', 'phenix.rosetta_refine',
  'phenix.rotalyze', 'phenix.runWizard', 'phenix.run_example',
  'phenix.run_refinement_example', 'phenix.sad_data_from_pdb',
  'phenix.scale_and_merge', 'phenix.sceds', 'phenix.sculpt_ensemble',
  'phenix.sculptor', 'phenix.search_and_morph',
  'phenix.secondary_structure_restraints',
  'phenix.secondary_structure_validation', 'phenix.segment_and_split_map',
  'phenix.sequence_from_map', 'phenix.setup_tutorial',
  'phenix.shift_model_to_match_map', 'phenix.show_build_path',
  'phenix.show_dist_paths', 'phenix.show_geometry_outliers',
  'phenix.show_map_info', 'phenix.simple_ncs_from_pdb', 'phenix.sisa',
  'phenix.solve', 'phenix.sort_hetatms', 'phenix.start_chimerax',
  'phenix.start_coot', 'phenix.structural_domain_search',
  'phenix.structure_comparison', 'phenix.structure_search',
  'phenix.subgroup_symmetry_datasets', 'phenix.suitename',
  'phenix.suitename_old', 'phenix.superpose_and_morph',
  'phenix.superpose_ligands', 'phenix.superpose_maps',
  'phenix.superpose_models', 'phenix.superpose_pdbs', 'phenix.table_one',
  'phenix.tls', 'phenix.tls_analysis', 'phenix.tls_as_xyz',
  'phenix.trace_and_build', 'phenix.trim_overlapping', 'phenix.trim_pdb',
  'phenix.twin_map_utils', 'phenix.undowser2_validation',
  'phenix.undowser_validation', 'phenix.unique_with_biomt',
  'phenix.update_ai_database', 'phenix.validate_H',
  'phenix.validation_cryoem', 'phenix.varref', 'phenix.version',
  'phenix.voyager.casp_rel_ellg', 'phenix.voyager.em_placement',
  'phenix.voyager.emplace_local', 'phenix.voyager.isostructure',
  'phenix.voyager.lddt_to_bfactor', 'phenix.voyager.rmsd_to_bfactor',
  'phenix.where_mon_lib_list_cif', 'phenix.wxprintenv', 'phenix.wxpython',
  'phenix.xmanip', 'phenix.xtriage', 'phenix.zmq_queue', 'phenix.zmq_worker'
]

# File names to not use
INVALID_FILENAMES = {
  "input.pdb", "model.pdb", "search_model.pdb",
  "my_model.pdb", "sequence.seq", "seq.dat",
  "protein.seq", "data.mtz", "reflections.mtz",
  "input.mtz", "output.mtz"
}

# 2. Syntax Hints (The "Tactician's" Cheat Sheet)
USAGE_HINTS = {
  "phenix.phaser": (
    "USAGE RULE: phenix.phaser <data.mtz>"
    " <model.pdb> <sequence.fa> [options]\n"
    "CRITICAL NOTES:\n"
    "1. Use bare filenames for all files."
    " Do NOT use 'hklin=xxx' or"
    " 'model_file=xxx'.\n"
    "2. Do NOT use complex 'ensemble.coords='"
    " syntax. Just provide the PDB file"
    " directly.\n"
    "3. USAGE RULE: To search for more than"
    " one copy, specify `ncopies=X`.\n"
    "4. GEOMETRY WARNING: If you get"
    " 'Incompatible unit cell', STOP."
    " Run `phenix.xtriage`."
  ),
  "phenix.xtriage": (
    "USAGE RULE: phenix.xtriage"
    " <reflections.mtz> [options]\n"
    "NOTE: The reflections file MUST be a"
    " positional argument.\n"
    "CRITICAL: This tool is for DIAGNOSIS"
    " ONLY. It cannot output re-indexed MTZ"
    " files. Do NOT use `hklout` or try to"
    " change space groups with it."
  ),
  "phenix.reflection_file_converter": (
    "USAGE RULE:"
    " phenix.reflection_file_converter"
    " <reflections.mtz> [options]\n"
    "SYNTAX EXCEPTION: This program requires"
    " double-dashes. Do NOT use bare"
    " `key=value`.\n"
    "CORRECT EXAMPLE:"
    " `phenix.reflection_file_converter"
    " data.mtz --space_group=P61"
    " --mtz=out.mtz --label=I-obs`\n"
    "CRITICAL: You MUST specify `--label`."
    " Look at the error log for choices."
  ),
  "phenix.explore_metric_symmetry": (
    "USAGE RULE:"
    " phenix.explore_metric_symmetry"
    " <data.mtz> [options]\n"
    "NOTE: File must be positional."
  ),
  "phenix.predict_and_build": (
    "USAGE RULE: phenix.predict_and_build"
    " [options] [param=value]\n"
    "USAGE RULE: be sure to add resolution=xxx"
    " as resolution is required.\n"
    "USAGE RULE: if you use the keyword"
    " `stop_after_predict=True` to run just"
    " prediction, supply a sequence file, and"
    " optionally supply a model to be used as"
    " a template.\n"
    "USAGE RULE: if you do not use the keyword"
    " `stop_after_predict=True` to run just"
    " prediction, be sure to add"
    " resolution=xxx as resolution is required"
    " in this case.\n"
    "CRITICAL SCOPING RULE: This program uses"
    " a deep PHIL hierarchy. Use full paths"
    " (e.g. `input_files.xray_data_file=...`)."
  ),
  "phenix.reindex": (
    "USAGE RULE: phenix.reindex <data.mtz>"
    " space_group=P61 [options]\n"
    "CRITICAL: If input has multiple arrays,"
    " use `labels='I-obs'`."
  ),
  "phenix.refine": (
    "USAGE RULE: phenix.refine <model.pdb>"
    " <data.mtz> [options]\n"
    "CRITICAL: If you need to generate R-free"
    " flags, use the full path:"
    " `xray_data.r_free_flags.generate=True`."
    "CRITICAL: Include twinning in refinement"
    " ONLY if twinning is definitely present"
    " and the twin fraction is at least 0.20."
    "CRITICAL: To specify twinning: use the"
    " full path:"
    " `refinement.twinning.twin_law=...`"
    "CRITICAL: To specify resolution: use the"
    " full path:"
    " `xray_data.high_resolution==...`"
    "CRITICAL: To specify nproc: use the full"
    " path: `refinement.main.nproc=...`"
  ),
}

# 3. Strategic Heuristics (The "Strategist's" Playbook)
KNOWN_ISSUES = {
  "No array of R-free flags found": (
    "ACTION: if you got this running"
    " phenix.refine, rerun with"
    " `xray_data.r_free_flags.generate=True`"
  ),
  "File not found": (
    "ACTION: if this is an input file, make"
    " sure that you have included the path to"
    " the file.  Look at your list of"
    " `Original Input Files`"
  ),
  "Incompatible unit cell": (
    "ACTION: If re-indexing failed, the space"
    " group is physically impossible. Revert"
    " to ORIGINAL space group and assume"
    " Twinning."
  ),
  "Symmetry error": (
    "ACTION: Run `phenix.xtriage` to check"
    " lattice."
  ),
  "No search procedure defined": (
    "ACTION: Retry `phenix.phaser`."
    " Add `ncopies=1`."
  ),
  "You must specify a reflections file": (
    "ACTION: Retry `phenix.phaser` with bare"
    " filename first."
  ),
  "Unknown space group": (
    "ACTION: Run `phenix.xtriage` to get"
    " space group possibilities."
  ),
  "Ambiguous parameter definition": (
    "ACTION: Use full parameter scope."
  ),
  "Ambiguous parameter definition": (
    "ACTION: The log lists 'Best matches'. You"
    " MUST choose one of the full paths listed"
    " (e.g. `xray_data...` for X-ray). Do not"
    " use the short name."
  ),
  "Please use --label": (
    "ACTION: Add `--label=I-obs`."
  ),
  "Couldn't find array": (
    "ACTION: The data label is wrong. Do NOT"
    " run xtriage. Check the history (previous"
    " Xtriage logs) for valid labels"
    " (e.g. I-obs) and retry."
  ),
  "MTZ file read error": (
    "ACTION: The file path is incorrect or"
    " missing. Check 'Original Input Files'"
    " and use the FULL PATH"
    " (e.g. `dir/data.mtz`) instead of the"
    " bare filename."
  ),
}
