diff --git a/tools/utils/ats_xdmf.py b/tools/utils/ats_xdmf.py index d43979022..4560f4062 100644 --- a/tools/utils/ats_xdmf.py +++ b/tools/utils/ats_xdmf.py @@ -68,18 +68,99 @@ def find_domains(directory='.'): f"No ats_vis_*data.h5 files found in '{directory}'.") return sorted(found, key=natural_sort_key) +def resolve_vis_input(domain_or_path, directory='.', prefix='ats_vis'): + """Resolve (directory, domain, filename) from a domain name or a filepath. + + Accepts three calling conventions: + + 1. Domain name — ``resolve_vis_input('surface')`` + Returns ``(directory, 'surface', 'ats_vis_surface_data.h5')``. + + 2. Domain name with explicit directory / prefix overrides + — ``resolve_vis_input('surface', directory='/run', prefix='myprefix')`` + Returns ``('/run', 'surface', 'myprefix_surface_data.h5')``. + + 3. File path — ``resolve_vis_input('/run/ats_vis_surface_data.h5')`` + The argument is treated as a filepath when it contains a path separator + or ends in ``.h5`` or ``.xmf``. ``directory`` and ``prefix`` args are + ignored. ``domain`` is returned as ``None`` because the filename stem + is an opaque combination of prefix and domain (they cannot be separated + without additional information). For ``.xmf`` files the companion + ``_data.h5`` file is located in the same directory. + + Parameters + ---------- + domain_or_path : str + ATS domain name OR path to any vis-related file. + directory : str, optional + Input directory, used only in convention 1 / 2. Default ``'.'``. + prefix : str, optional + Filename prefix, used only in convention 1 / 2. Default ``'ats_vis'``. + + Returns + ------- + directory : str + domain : str or None + filename : str + Basename of the data ``.h5`` file (not a full path). + """ + arg = domain_or_path + + # Detect filepath: contains a separator, or ends with a known extension. + is_path = (os.sep in arg or '/' in arg or + arg.endswith('.h5') or arg.endswith('.xmf')) + + if is_path: + directory = os.path.dirname(os.path.abspath(arg)) if os.path.dirname(arg) else '.' + basename = os.path.basename(arg) + + # Normalise .xmf files to their companion _data.h5 + if basename.endswith('.xmf'): + # Per-cycle: foo_data.h5.42.xmf -> foo_data.h5 + # VisIt master: foo_data.VisIt.xmf -> foo_data.h5 + # General rule: strip everything from the first '.xmf'-adjacent + # suffix by finding the _data.h5 sibling. + import glob as _glob + candidates = _glob.glob(os.path.join(directory, '*_data.h5')) + # Keep only candidates whose basename is a prefix of this xmf name + matches = [os.path.basename(c) for c in candidates + if basename.startswith(os.path.basename(c))] + if len(matches) == 1: + basename = matches[0] + elif not matches: + raise RuntimeError( + f"Cannot find companion _data.h5 for {arg!r} " + f"in {directory!r}.") + else: + raise RuntimeError( + f"Ambiguous companion _data.h5 for {arg!r}: {matches}") + + # domain is None — stem is prefix+domain combined, indistinguishable + return directory, None, basename + + else: + # Convention 1 / 2: construct filename from prefix and domain + domain = arg + if domain == 'domain': + fname = f'{prefix}_data.h5' + else: + fname = f'{prefix}_{domain}_data.h5' + return directory, domain, fname + + def time_unit_conversion(value, input_unit, output_unit): time_in_seconds = { - 'yr': 365.25 * 24 * 3600, + 'y': 365.25 * 24 * 3600, 'noleap': 365 * 24 *3600, 'd': 24 * 3600, - 'hr': 3600, + 'h': 3600, + 'min': 60, 's': 1 } if input_unit not in time_in_seconds: - raise ValueError("Invalid input time unit : must be one of 'yr', 'noleap', 'd', 'hr', or 's'") + raise ValueError("Invalid input time unit : must be one of 'y', 'noleap', 'd', 'h', 'min', or 's'") if output_unit not in time_in_seconds: - raise ValueError("Invalid output time unit : must be one of 'yr', 'noleap', 'd', 'hr', or 's'") + raise ValueError("Invalid output time unit : must be one of 'y', 'noleap', 'd', 'h', 'min', or 's'") value2sec = value * time_in_seconds[input_unit] output_value = value2sec / time_in_seconds[output_unit] @@ -143,8 +224,8 @@ def __init__(self, directory='.', domain=None, filename=None, mesh_filename=None else: warnings.warn( f"HDF5 file {self.fname!r} has no 'time unit' attribute; " - "assuming 'yr'. This file may be from an old version of ATS.") - self.input_time_unit = 'yr' + "assuming 'y'. This file may be from an old version of ATS.") + self.input_time_unit = 'y' self.output_time_unit = output_time_unit if output_time_unit is not None else self.input_time_unit diff --git a/tools/utils/combine_vis.py b/tools/utils/combine_vis.py deleted file mode 100644 index ace6c5010..000000000 --- a/tools/utils/combine_vis.py +++ /dev/null @@ -1,566 +0,0 @@ -#!/usr/bin/env python -"""Combine ATS visualization output from restarted/continuation runs into a single -self-contained XDMF dataset. - -Takes N input directories (in chronological order) and produces one output -directory containing: - - A combined HDF5 data file with renumbered timestep keys (0, 1, 2, ...) - - One per-step XMF per selected cycle - - A master VisIt.xmf - - The mesh H5 and XMF files (copied from the first run that has them) - -Overlapping cycles at restart boundaries are deduplicated: for each run except -the last, any cycle whose time >= the start time of the next run is dropped. - -Examples --------- -Combine three restart directories for the "domain" domain:: - - combine_vis.py domain run0/ run1/ run2/ --output combined/ - -Same for the surface domain, with a 1-hour overlap tolerance:: - - combine_vis.py surface run0/ run1/ run2/ --output combined/ --eps 3600 --time-unit s -""" - -import sys -import os -import shutil -import argparse -import warnings -import xml.etree.ElementTree as ET - -import numpy as np -import h5py - -try: - sys.path.insert(0, os.path.join(os.environ['ATS_SRC_DIR'], 'tools', 'utils')) -except KeyError: - pass - -import ats_xdmf - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _validate_time_ordering(vis_files, directories, time_unit): - """Warn loudly if runs are not in chronological order. - - The only condition we can guarantee is that start[i+1] > start[i]. - """ - for i in range(len(vis_files) - 1): - t0 = vis_files[i].times[0] - t1 = vis_files[i + 1].times[0] - if t1 <= t0: - warnings.warn( - f"\n *** TIME ORDERING WARNING ***\n" - f" Run '{directories[i+1]}' starts at t={t1:.6g} {time_unit}\n" - f" but run '{directories[i]}' starts at t={t0:.6g} {time_unit}.\n" - f" Directories may be out of order — combined output may be wrong.", - stacklevel=2) - - -def _select_cycles(vis_files, directories, eps): - """For each run, select cycles to include after deduplication. - - Parameters - ---------- - vis_files : list of ats_xdmf.VisFile - directories : list of str - eps : float - Overlap tolerance in the VisFile's output_time_unit. Cycles from run i - with time >= (start_time_of_run_i+1 - eps) are dropped. - - Returns - ------- - list of (vf, directory, selected_cycle_strs, selected_times) - """ - run_data = [] - for i, (vf, directory) in enumerate(zip(vis_files, directories)): - cycles = list(vf.cycles) - times = np.array(vf.times) - - if i < len(vis_files) - 1: - next_start = vis_files[i + 1].times[0] - cutoff = next_start - eps - mask = times < cutoff - sel_cycles = [c for c, m in zip(cycles, mask) if m] - sel_times = times[mask] - else: - sel_cycles = cycles - sel_times = times - - if len(sel_cycles) == 0: - warnings.warn( - f"Run '{directory}': all cycles filtered out after deduplication. " - "Check that directories are in chronological order and that --eps is appropriate.") - else: - run_data.append((vf, directory, sel_cycles, sel_times)) - - return run_data - - -def _compute_selected_vars(vis_files, include_vars, exclude_vars): - """Return the intersection of variable sets across runs, then apply filters. - - Parameters - ---------- - vis_files : list of ats_xdmf.VisFile - include_vars : list of str or None - exclude_vars : list of str or None - - Returns - ------- - list of str - """ - var_sets = [set(vf.variables()) for vf in vis_files] - common_vars = set.intersection(*var_sets) - - # Warn about any per-run differences - all_vars = set.union(*var_sets) - if common_vars != all_vars: - for i, (vf, vs) in enumerate(zip(vis_files, var_sets)): - missing = common_vars - vs - if missing: - warnings.warn( - f"Run {i}: missing variables present in other runs: {sorted(missing)}. " - "These will be excluded from the combined output.") - - # Apply user filters using the first VisFile's matching logic, then restrict - # to common_vars. - filtered = vis_files[0].variables(names=include_vars, exclude=exclude_vars) - selected = [v for v in filtered if v in common_vars] - - if not selected: - raise RuntimeError( - "No variables selected after filtering. Check --include / --exclude.") - - return selected - - -def _write_combined_h5(run_data, out_h5_path, selected_vars): - """Write combined HDF5 with sequentially renumbered cycle keys. - - Parameters - ---------- - run_data : list of (vf, directory, sel_cycles, sel_times) - out_h5_path : str - selected_vars : list of str - - Returns - ------- - steps : list of (run_idx, old_key_str, new_key_int, h5_native_time) - One entry per written cycle step. - """ - steps = [] - new_key = 0 - - with h5py.File(out_h5_path, 'w') as dst: - # Propagate time unit from first run - first_vf = run_data[0][0] - if 'time unit' in first_vf.d.attrs: - dst.attrs['time unit'] = first_vf.d.attrs['time unit'] - - # Create variable groups up front - for var in selected_vars: - dst.create_group(var) - - for run_idx, (vf, directory, sel_cycles, sel_times) in enumerate(run_data): - src = vf.d - for old_key_str in sel_cycles: - new_key_str = str(new_key) - h5_native_time = None - - for var in selected_vars: - if var not in src: - warnings.warn(f"Variable '{var}' missing in '{directory}'; skipping.") - continue - if old_key_str not in src[var]: - warnings.warn( - f"Cycle key '{old_key_str}' missing for variable '{var}' " - f"in '{directory}'; skipping.") - continue - - ds = dst[var].create_dataset(new_key_str, data=src[var][old_key_str][:]) - if 'Time' in src[var][old_key_str].attrs: - t_val = src[var][old_key_str].attrs['Time'] - ds.attrs['Time'] = t_val - if h5_native_time is None: - h5_native_time = float(t_val) - - steps.append((run_idx, old_key_str, new_key, h5_native_time)) - new_key += 1 - - return steps - - -def _write_step_xmf(in_xmf_path, out_xmf_path, old_key_str, new_key_int, - h5_name, h5_native_time, selected_vars): - """Write a per-step XMF with updated key references. - - Rewrites DataItem text from ``h5name:VARNAME/{old_key}`` to - ``h5name:VARNAME/{new_key}``. Removes Attribute elements for variables - not in selected_vars. Updates the ``