From f47ecd8a8af8571d06f30eff75195da051596078 Mon Sep 17 00:00:00 2001 From: Ethan Coon Date: Tue, 24 Mar 2026 17:04:29 -0600 Subject: [PATCH 1/7] a bit more flexibility in the CLI for combine/subset/query --- tools/utils/ats_xdmf.py | 80 +++++++++++++++++++++ tools/utils/query_vis.py | 60 +++++++++++----- tools/utils/subset_vis.py | 144 +++++++++++++++++++++++++++----------- 3 files changed, 225 insertions(+), 59 deletions(-) diff --git a/tools/utils/ats_xdmf.py b/tools/utils/ats_xdmf.py index d43979022..e2482b52b 100644 --- a/tools/utils/ats_xdmf.py +++ b/tools/utils/ats_xdmf.py @@ -68,6 +68,86 @@ def find_domains(directory='.'): f"No ats_vis_*data.h5 files found in '{directory}'.") return sorted(found, key=natural_sort_key) +def resolve_vis_input(domain_or_path, directory='.', prefix='ats_vis'): + """Resolve (directory, domain, filename) from a domain name or a filepath. + + Accepts three calling conventions: + + 1. Domain name — ``resolve_vis_input('surface')`` + Returns ``(directory, 'surface', 'ats_vis_surface_data.h5')``. + + 2. Domain name with explicit directory / prefix overrides + — ``resolve_vis_input('surface', directory='/run', prefix='myprefix')`` + Returns ``('/run', 'surface', 'myprefix_surface_data.h5')``. + + 3. File path — ``resolve_vis_input('/run/ats_vis_surface_data.h5')`` + The argument is treated as a filepath when it contains a path separator + or ends in ``.h5`` or ``.xmf``. ``directory`` and ``prefix`` args are + ignored. ``domain`` is returned as ``None`` because the filename stem + is an opaque combination of prefix and domain (they cannot be separated + without additional information). For ``.xmf`` files the companion + ``_data.h5`` file is located in the same directory. + + Parameters + ---------- + domain_or_path : str + ATS domain name OR path to any vis-related file. + directory : str, optional + Input directory, used only in convention 1 / 2. Default ``'.'``. + prefix : str, optional + Filename prefix, used only in convention 1 / 2. Default ``'ats_vis'``. + + Returns + ------- + directory : str + domain : str or None + filename : str + Basename of the data ``.h5`` file (not a full path). + """ + arg = domain_or_path + + # Detect filepath: contains a separator, or ends with a known extension. + is_path = (os.sep in arg or '/' in arg or + arg.endswith('.h5') or arg.endswith('.xmf')) + + if is_path: + directory = os.path.dirname(os.path.abspath(arg)) if os.path.dirname(arg) else '.' + basename = os.path.basename(arg) + + # Normalise .xmf files to their companion _data.h5 + if basename.endswith('.xmf'): + # Per-cycle: foo_data.h5.42.xmf -> foo_data.h5 + # VisIt master: foo_data.VisIt.xmf -> foo_data.h5 + # General rule: strip everything from the first '.xmf'-adjacent + # suffix by finding the _data.h5 sibling. + import glob as _glob + candidates = _glob.glob(os.path.join(directory, '*_data.h5')) + # Keep only candidates whose basename is a prefix of this xmf name + matches = [os.path.basename(c) for c in candidates + if basename.startswith(os.path.basename(c))] + if len(matches) == 1: + basename = matches[0] + elif not matches: + raise RuntimeError( + f"Cannot find companion _data.h5 for {arg!r} " + f"in {directory!r}.") + else: + raise RuntimeError( + f"Ambiguous companion _data.h5 for {arg!r}: {matches}") + + # domain is None — stem is prefix+domain combined, indistinguishable + return directory, None, basename + + else: + # Convention 1 / 2: construct filename from prefix and domain + domain = arg + if domain == 'domain': + fname = f'{prefix}_data.h5' + else: + fname = f'{prefix}_{domain}_data.h5' + return directory, domain, fname + + def time_unit_conversion(value, input_unit, output_unit): time_in_seconds = { 'yr': 365.25 * 24 * 3600, diff --git a/tools/utils/query_vis.py b/tools/utils/query_vis.py index 1bc5a8bc8..3ced6d965 100644 --- a/tools/utils/query_vis.py +++ b/tools/utils/query_vis.py @@ -24,11 +24,21 @@ def _display_name(varname, domain): return name -def queryVisFiles(directory, domain, time_unit=None): - """Print a summary of ATS visualization output for one domain.""" - h5_name = ats_xdmf.valid_data_filename(domain) - - vf = ats_xdmf.VisFile(directory, domain=domain, output_time_unit=time_unit) +def queryVisFiles(directory, domain, filename, time_unit=None): + """Print a summary of ATS visualization output for one file. + + Parameters + ---------- + directory : str + domain : str or None + ATS domain name, or None when opening by raw filename. + filename : str + Basename of the data HDF5 file. + time_unit : str or None + Display unit. None means use native unit from the file. + """ + vf = ats_xdmf.VisFile(directory, domain=domain, filename=filename, + output_time_unit=time_unit) n_cycles = len(vf.cycles) first_cycle = int(vf.cycles[0]) if n_cycles > 0 else None @@ -48,7 +58,8 @@ def queryVisFiles(directory, domain, time_unit=None): except (KeyError, IndexError): pass - print(f"Domain: {domain} ({h5_name})") + label = domain if domain is not None else filename + print(f"Domain: {label} ({filename})") if n_cycles > 0: print(f" Cycles: {n_cycles} ({first_cycle} \u2026 {last_cycle})") print(f" Time: {t_first:.3f} \u2026 {t_last:.3f} {vf.output_time_unit}") @@ -81,27 +92,42 @@ def main(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('domain', metavar='DOMAIN', nargs='?', default='*', - help='ATS domain name (e.g. "surface", "domain"), or ' - '"*" to summarize all domains found (default: *)') + parser.add_argument('domain', metavar='DOMAIN_OR_FILE', nargs='?', default='*', + help='ATS domain name, path to a vis file (.h5 or .xmf), ' + 'or "*" to summarize all domains found (default: *). ' + 'A filepath implies its own directory; -d and -p are ignored.') parser.add_argument('-d', '--directory', dest='directory', default='.', help='Directory containing visualization files ' - '(default: current directory)') + '(default: current directory). Ignored when DOMAIN_OR_FILE ' + 'is a filepath.') + parser.add_argument('-p', '--prefix', dest='prefix', default='ats_vis', + help='Filename prefix (default: ats_vis). Ignored when ' + 'DOMAIN_OR_FILE is a filepath.') parser.add_argument('--time-unit', dest='time_unit', default=None, choices=['s', 'hr', 'd', 'yr', 'noleap'], help='Time unit for display (default: native unit from file)') args = parser.parse_args() - if args.domain == '*': + arg = args.domain + is_path = (os.sep in arg or '/' in arg or + arg.endswith('.h5') or arg.endswith('.xmf')) + + if is_path: + directory, domain, filename = ats_xdmf.resolve_vis_input(arg) + queryVisFiles(directory, domain, filename, time_unit=args.time_unit) + elif arg == '*': domains = ats_xdmf.find_domains(args.directory) + for i, domain in enumerate(domains): + if i > 0: + print() + _, domain, filename = ats_xdmf.resolve_vis_input( + domain, directory=args.directory, prefix=args.prefix) + queryVisFiles(args.directory, domain, filename, time_unit=args.time_unit) else: - domains = [args.domain] - - for i, domain in enumerate(domains): - if i > 0: - print() - queryVisFiles(args.directory, domain, time_unit=args.time_unit) + _, domain, filename = ats_xdmf.resolve_vis_input( + arg, directory=args.directory, prefix=args.prefix) + queryVisFiles(args.directory, domain, filename, time_unit=args.time_unit) if __name__ == '__main__': diff --git a/tools/utils/subset_vis.py b/tools/utils/subset_vis.py index d1eeeebdc..d7232ea67 100644 --- a/tools/utils/subset_vis.py +++ b/tools/utils/subset_vis.py @@ -54,7 +54,7 @@ def _parse_slice_or_list(s, cast): # Core subset function # --------------------------------------------------------------------------- -def subsetVisFiles(in_directory, domain, out_directory, +def subsetVisFiles(in_directory, domain, in_filename, out_directory, out_stem, times=None, time_unit='s', time_tolerance=1.0, cycles=None, indices=None, include_vars=None, exclude_vars=None, @@ -65,12 +65,16 @@ def subsetVisFiles(in_directory, domain, out_directory, ---------- in_directory : str Directory containing input visualization files. - domain : str - ATS domain name (e.g. 'surface', 'domain'). Used to construct - filenames via ats_xdmf.valid_data_filename(). + domain : str or None + ATS domain name, or None when input was specified as a filepath. + in_filename : str + Basename of the input data HDF5 file. out_directory : str - Directory for output files (created if needed). The same domain - name is used for output filenames. + Directory for output files (created if needed). + out_stem : str + Output filename stem (everything before ``_data.h5``). When domain is + known this is ``{out_prefix}_{domain}``; when input was a filepath it + is the input stem (prefix+domain combined, indistinguishable). times : slice or list or None Result of _parse_slice_or_list() for --times, or None. time_unit : str @@ -88,14 +92,17 @@ def subsetVisFiles(in_directory, domain, out_directory, dry_run : bool If True, print what would be done and return without writing files. """ - h5_name = ats_xdmf.valid_data_filename(domain) - mesh_name = ats_xdmf.valid_mesh_filename(domain) + out_h5_name = f'{out_stem}_data.h5' - in_h5 = os.path.join(in_directory, h5_name) + # Mesh always uses the standard ats_vis name so multiple subsets of the + # same domain can share a single mesh file. + mesh_name = ats_xdmf.valid_mesh_filename(domain if domain is not None else 'domain') + + in_h5 = os.path.join(in_directory, in_filename) if not os.path.isfile(in_h5): raise RuntimeError(f"No HDF5 data file found at {in_h5!r}.") - vf = ats_xdmf.VisFile(in_directory, domain=domain, + vf = ats_xdmf.VisFile(in_directory, domain=domain, filename=in_filename, output_time_unit=time_unit) # Apply cycle/time/index filter @@ -127,11 +134,11 @@ def subsetVisFiles(in_directory, domain, out_directory, # Create output directory os.makedirs(out_directory, exist_ok=True) - # Write per-step XMFs for selected cycles + # Write per-step XMFs for selected cycles (referencing out_h5_name) for cycle in vf.cycles: N = int(cycle) - in_xmf_path = os.path.join(in_directory, f'{h5_name}.{N}.xmf') - out_xmf_path = os.path.join(out_directory, f'{h5_name}.{N}.xmf') + in_xmf_path = os.path.join(in_directory, f'{in_filename}.{N}.xmf') + out_xmf_path = os.path.join(out_directory, f'{out_h5_name}.{N}.xmf') ET.register_namespace('', 'http://www.w3.org/2001/XInclude') tree = ET.parse(in_xmf_path) @@ -146,16 +153,22 @@ def subsetVisFiles(in_directory, domain, out_directory, for e in to_remove: grid.remove(e) + # Update DataItem paths to reference the new h5 filename + for item in root.iter('DataItem'): + text = item.text or '' + if in_filename in text: + item.text = text.replace(in_filename, out_h5_name) + tree.write(out_xmf_path, xml_declaration=True, encoding='ASCII') # Write master VisIt.xmf - _write_visit_xmf(out_directory, h5_name, vf.cycles) + _write_visit_xmf(out_directory, out_h5_name, vf.cycles) # Write subset H5 data file - out_h5 = os.path.join(out_directory, h5_name) + out_h5 = os.path.join(out_directory, out_h5_name) _write_subset_h5(in_h5, out_h5, vf.cycles, selected_vars) - # Copy mesh H5 + # Copy mesh H5 (always standard name so subsets can share it) in_mesh_h5 = os.path.join(in_directory, mesh_name) out_mesh_h5 = os.path.join(out_directory, mesh_name) if os.path.isfile(in_mesh_h5): @@ -164,9 +177,9 @@ def subsetVisFiles(in_directory, domain, out_directory, else: warnings.warn(f"Mesh HDF5 not found: {in_mesh_h5}") - # Copy mesh XMFs - stem = mesh_name[:-3] # strip '.h5' - for suffix in [f'{mesh_name}.0.xmf', f'{stem}.VisIt.xmf']: + # Copy mesh XMFs (standard names) + mesh_stem = mesh_name[:-3] # strip '.h5' + for suffix in [f'{mesh_name}.0.xmf', f'{mesh_stem}.VisIt.xmf']: in_mesh_xmf = os.path.join(in_directory, suffix) if os.path.isfile(in_mesh_xmf): out_mesh_xmf = os.path.join(out_directory, suffix) @@ -176,7 +189,7 @@ def subsetVisFiles(in_directory, domain, out_directory, vf.close() print(f"Done. Output in: {out_directory}") print(f" {len(vf.cycles)} cycles, {len(selected_vars)} variables") - print(f" Open in VisIt: {os.path.join(out_directory, h5_name[:-3] + '.VisIt.xmf')}") + print(f" Open in VisIt: {os.path.join(out_directory, out_stem + '.VisIt.xmf')}") def _write_visit_xmf(out_directory, h5_name, cycles): @@ -237,16 +250,26 @@ def main(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('domain', metavar='DOMAIN', - help='ATS domain name (e.g. "surface", "domain"), or ' - '"*" to process all domains found in the input ' - 'directory.') + parser.add_argument('domain', metavar='DOMAIN_OR_FILE', + help='ATS domain name (e.g. "surface", "domain"), path to a ' + 'vis file (.h5 or .xmf), or "*" to process all domains ' + 'found in the input directory. A filepath implies its own ' + 'directory; -d and -p are ignored.') parser.add_argument('-d', '--directory', dest='directory', default='.', help='Directory containing input visualization files ' - '(default: current directory)') + '(default: current directory). Ignored when DOMAIN_OR_FILE ' + 'is a filepath.') + parser.add_argument('-p', '--prefix', dest='prefix', default='ats_vis', + help='Input filename prefix (default: ats_vis). Ignored when ' + 'DOMAIN_OR_FILE is a filepath.') parser.add_argument('--output', '-o', dest='output', default=None, - help='Output directory ' - '(default: DIRECTORY/subset)') + help='Output directory (default: DIRECTORY/subset)') + parser.add_argument('--out-prefix', dest='out_prefix', default=None, + help='Output filename prefix (default: same as input prefix). ' + 'Combined with domain to form the output stem: ' + '{out_prefix}_{domain}_data.h5. When input was a filepath ' + 'the default output stem is the input stem (prefix+domain ' + 'combined).') # Time/cycle/index selection (mutually exclusive) filter_group = parser.add_mutually_exclusive_group() @@ -295,17 +318,14 @@ def main(): args = parser.parse_args() - in_directory = args.directory + arg = args.domain + is_path = (os.sep in arg or '/' in arg or + arg.endswith('.h5') or arg.endswith('.xmf')) out_directory = args.output - if out_directory is None: - out_directory = os.path.join(in_directory, 'subset') - - # Parse filter arguments into slice objects or lists - times_spec = None - cycles_spec = None - indices_spec = None + # Parse filter arguments + times_spec = cycles_spec = indices_spec = None if args.times is not None: times_spec = _parse_slice_or_list(args.times, float) elif args.cycles is not None: @@ -313,17 +333,23 @@ def main(): elif args.indices is not None: indices_spec = _parse_slice_or_list(args.indices, int) - if args.domain == '*': - domains = ats_xdmf.find_domains(in_directory) - print(f"Found domains: {domains}") - else: - domains = [args.domain] - - for domain in domains: + if is_path: + in_directory, domain, in_filename = ats_xdmf.resolve_vis_input(arg) + if out_directory is None: + out_directory = os.path.join(in_directory, 'subset') + # Default out_stem: input stem (prefix+domain combined) + if args.out_prefix is not None: + # User supplied an explicit prefix; domain is unknown so treat + # out_prefix as the full stem. + out_stem = args.out_prefix + else: + out_stem = in_filename[:-len('_data.h5')] subsetVisFiles( in_directory=in_directory, domain=domain, + in_filename=in_filename, out_directory=out_directory, + out_stem=out_stem, times=times_spec, time_unit=args.time_unit, time_tolerance=args.time_tolerance, @@ -333,6 +359,40 @@ def main(): exclude_vars=args.exclude_vars, dry_run=args.dry_run, ) + else: + in_directory = args.directory + if out_directory is None: + out_directory = os.path.join(in_directory, 'subset') + + if arg == '*': + domains = ats_xdmf.find_domains(in_directory) + print(f"Found domains: {domains}") + else: + domains = [arg] + + for domain in domains: + _, domain, in_filename = ats_xdmf.resolve_vis_input( + domain, directory=in_directory, prefix=args.prefix) + out_prefix = args.out_prefix if args.out_prefix is not None else args.prefix + if domain == 'domain': + out_stem = out_prefix + else: + out_stem = f'{out_prefix}_{domain}' + subsetVisFiles( + in_directory=in_directory, + domain=domain, + in_filename=in_filename, + out_directory=out_directory, + out_stem=out_stem, + times=times_spec, + time_unit=args.time_unit, + time_tolerance=args.time_tolerance, + cycles=cycles_spec, + indices=indices_spec, + include_vars=args.include_vars, + exclude_vars=args.exclude_vars, + dry_run=args.dry_run, + ) if __name__ == '__main__': From 654c62eeca8138fc8dea6ad567135a4f2234b025 Mon Sep 17 00:00:00 2001 From: Ethan Coon Date: Tue, 24 Mar 2026 17:09:10 -0600 Subject: [PATCH 2/7] fixes default time unit in subset --- tools/utils/subset_vis.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/utils/subset_vis.py b/tools/utils/subset_vis.py index d7232ea67..97babc3e8 100644 --- a/tools/utils/subset_vis.py +++ b/tools/utils/subset_vis.py @@ -55,7 +55,7 @@ def _parse_slice_or_list(s, cast): # --------------------------------------------------------------------------- def subsetVisFiles(in_directory, domain, in_filename, out_directory, out_stem, - times=None, time_unit='s', time_tolerance=1.0, + times=None, time_unit=None, time_tolerance=1.0, cycles=None, indices=None, include_vars=None, exclude_vars=None, dry_run=False): @@ -77,8 +77,9 @@ def subsetVisFiles(in_directory, domain, in_filename, out_directory, out_stem, is the input stem (prefix+domain combined, indistinguishable). times : slice or list or None Result of _parse_slice_or_list() for --times, or None. - time_unit : str - One of 's', 'hr', 'd', 'yr'. Units for time filtering values. + time_unit : str or None + Units for --times filter values. Default None uses the native unit + stored in the file. time_tolerance : float Tolerance for time matching, in time_unit. cycles : slice or list or None @@ -103,11 +104,14 @@ def subsetVisFiles(in_directory, domain, in_filename, out_directory, out_stem, raise RuntimeError(f"No HDF5 data file found at {in_h5!r}.") vf = ats_xdmf.VisFile(in_directory, domain=domain, filename=in_filename, - output_time_unit=time_unit) + output_time_unit=None) + + # Resolve time unit: explicit override, or native unit from the file + effective_time_unit = time_unit if time_unit is not None else vf.input_time_unit # Apply cycle/time/index filter if times is not None: - vf.filterTimes(times, time_unit=time_unit, tolerance=time_tolerance) + vf.filterTimes(times, time_unit=effective_time_unit, tolerance=time_tolerance) elif cycles is not None: vf.filterCycles(cycles) elif indices is not None: @@ -124,7 +128,7 @@ def subsetVisFiles(in_directory, domain, in_filename, out_directory, out_stem, if dry_run: print(f"Selected {len(vf.cycles)} cycles:") for cycle, t in zip(vf.cycles, vf.times): - print(f" cycle {int(cycle):8d} t = {t:.6g} {time_unit}") + print(f" cycle {int(cycle):8d} t = {t:.6g} {effective_time_unit}") print(f"\nSelected {len(selected_vars)} variables:") for v in selected_vars: print(f" {v}") @@ -294,9 +298,9 @@ def main(): 'Formats: "START:STOP:STEP" (numpy-style, exclusive end), ' '"i1,i2,i3". Supports negative indices.') - parser.add_argument('--time-unit', dest='time_unit', default='s', + parser.add_argument('--time-unit', dest='time_unit', default=None, choices=['s', 'hr', 'd', 'yr', 'noleap'], - help='Time unit for --times values (default: s)') + help='Time unit for --times values (default: native unit from file)') parser.add_argument('--time-tolerance', dest='time_tolerance', type=float, default=1.0, help='Tolerance for nearest-time matching, in ' From c494b8261326812a3f563222fd84835c25fb663d Mon Sep 17 00:00:00 2001 From: Ethan Coon Date: Tue, 24 Mar 2026 18:03:29 -0600 Subject: [PATCH 3/7] removes file option from subset, makes clearer how to combine with prefixes on subset and combine --- tools/utils/combine_vis.py | 175 +++++++++++++------------------------ tools/utils/query_vis.py | 13 ++- tools/utils/subset_vis.py | 112 ++++++++---------------- 3 files changed, 101 insertions(+), 199 deletions(-) diff --git a/tools/utils/combine_vis.py b/tools/utils/combine_vis.py index ace6c5010..34e7816d2 100644 --- a/tools/utils/combine_vis.py +++ b/tools/utils/combine_vis.py @@ -46,10 +46,7 @@ # --------------------------------------------------------------------------- def _validate_time_ordering(vis_files, directories, time_unit): - """Warn loudly if runs are not in chronological order. - - The only condition we can guarantee is that start[i+1] > start[i]. - """ + """Warn loudly if runs are not in chronological order.""" for i in range(len(vis_files) - 1): t0 = vis_files[i].times[0] t1 = vis_files[i + 1].times[0] @@ -103,22 +100,10 @@ def _select_cycles(vis_files, directories, eps): def _compute_selected_vars(vis_files, include_vars, exclude_vars): - """Return the intersection of variable sets across runs, then apply filters. - - Parameters - ---------- - vis_files : list of ats_xdmf.VisFile - include_vars : list of str or None - exclude_vars : list of str or None - - Returns - ------- - list of str - """ + """Return the intersection of variable sets across runs, then apply filters.""" var_sets = [set(vf.variables()) for vf in vis_files] common_vars = set.intersection(*var_sets) - # Warn about any per-run differences all_vars = set.union(*var_sets) if common_vars != all_vars: for i, (vf, vs) in enumerate(zip(vis_files, var_sets)): @@ -128,8 +113,6 @@ def _compute_selected_vars(vis_files, include_vars, exclude_vars): f"Run {i}: missing variables present in other runs: {sorted(missing)}. " "These will be excluded from the combined output.") - # Apply user filters using the first VisFile's matching logic, then restrict - # to common_vars. filtered = vis_files[0].variables(names=include_vars, exclude=exclude_vars) selected = [v for v in filtered if v in common_vars] @@ -143,27 +126,18 @@ def _compute_selected_vars(vis_files, include_vars, exclude_vars): def _write_combined_h5(run_data, out_h5_path, selected_vars): """Write combined HDF5 with sequentially renumbered cycle keys. - Parameters - ---------- - run_data : list of (vf, directory, sel_cycles, sel_times) - out_h5_path : str - selected_vars : list of str - Returns ------- steps : list of (run_idx, old_key_str, new_key_int, h5_native_time) - One entry per written cycle step. """ steps = [] new_key = 0 with h5py.File(out_h5_path, 'w') as dst: - # Propagate time unit from first run first_vf = run_data[0][0] if 'time unit' in first_vf.d.attrs: dst.attrs['time unit'] = first_vf.d.attrs['time unit'] - # Create variable groups up front for var in selected_vars: dst.create_group(var) @@ -198,29 +172,7 @@ def _write_combined_h5(run_data, out_h5_path, selected_vars): def _write_step_xmf(in_xmf_path, out_xmf_path, old_key_str, new_key_int, h5_name, h5_native_time, selected_vars): - """Write a per-step XMF with updated key references. - - Rewrites DataItem text from ``h5name:VARNAME/{old_key}`` to - ``h5name:VARNAME/{new_key}``. Removes Attribute elements for variables - not in selected_vars. Updates the ``