An xarray backend for reading AMReX plotfiles with native support for time series concatenation and multi-level AMR data.
xamrex registers the amrex backend to xarray for working with AMReX simulation data in Python. The package supports both single plotfiles and multi-time series datasets, with intelligent handling of adaptive mesh refinement (AMR) levels. Efficient memory usage is achived with dask-backed arrays for lazy loading and large datasets.
# Install from source
git clone https://github.com/your-repo/xamrex.git
cd xamrex
pip install -e .import xarray as xr
# Load single AMReX plotfile
ds = xr.open_dataset('plt_00000', engine='amrex', level=0)
# Access data variables (lazy loaded)
temperature = ds['temp']
print(f"Shape: {temperature.shape}")
print(f"Time: {ds.attrs['current_time']}")import xarray as xr
# Method 1: Explicit file list
plotfiles = ['plt_00000', 'plt_01000', 'plt_02000']
ds = xr.open_dataset(plotfiles, engine='amrex', level=0)
# Method 2: Directory auto-discovery
ds = xr.open_dataset('simulation_output/', engine='amrex', level=0, pattern='plt_*')# Load different refinement levels
ds_level0 = xr.open_dataset(plotfiles, engine='amrex', level=0) # Base level
ds_level1 = xr.open_dataset(plotfiles, engine='amrex', level=1) # Refined level
print(f"Level 0: {dict(ds_level0.sizes)}")
print(f"Level 1: {dict(ds_level1.sizes)}") # Higher resolution in x,yMissing levels automatically filled with NaN. Time steps that don't have the requested level are filled completely. TODO: Flag levels that never exist within the dataset.
Some AMReX applications write additional variables outside Cell (for example rho2d, u2d, v2d).
There are two ways to include these when opening datasets:
- Load selected auxiliary groups by name.
- Load all auxiliary groups discovered in the AMReX Header.
import os
import xarray as xr
# Option 1: load base level plus selected 2D auxiliary groups
plt_root = "simulation_output/"
aux_groups = ['rho2d', 'u2d', 'v2d']
ds0 = xr.open_dataset(
os.path.join(plt_root),
engine='amrex',
level=0,
pattern='plt*',
auxiliary_multifabs=aux_groups,
)
# Option 2: include all auxiliary groups discovered in Header
ds_all = xr.open_dataset(
"simulation_output/",
engine="amrex",
level=0,
pattern="plt_*",
include_auxiliary_multifabs=True,
)The amrex engine automatically detects input type and handles appropriately:
# Single plotfile directory
ds = xr.open_dataset("plt_00000", engine='amrex', level=0)
# List of plotfile directories (time series)
ds = xr.open_dataset(["plt_00000", "plt_01000"], engine='amrex', level=0)
# Directory containing plotfiles (auto-discovery)
ds = xr.open_dataset("simulation_data/", engine='amrex', level=0, pattern="plt_*")
# Custom patterns
ds = xr.open_dataset("data/", engine='amrex', level=1, pattern="sim_run_*")import xamrex
# Primary time series loading function
ds = xamrex.open_amrex_time_series(plotfiles, level=0)
# Find plotfiles in directory
files = xamrex.find_amrex_time_series("data/", pattern="plt_*")
# Create time series from directory
ds = xamrex.create_time_series_from_directory("data/", pattern="plt_*", level=0)
# Validate file compatibility before loading
validation = xamrex.validate_time_series_compatibility(plotfiles)
print(f"Compatible: {validation['compatible']}")# Extract time slices
early = xamrex.extract_time_slice(ds, time_range=(0, 1000))
middle = xamrex.extract_time_slice(ds, time_indices=slice(5, 10))
# Compute time statistics
stats = xamrex.compute_time_statistics(
ds,
variables=['temp', 'salt'],
statistics=['mean', 'std', 'min', 'max']
)
print(f"Statistics: {list(stats.data_vars)}")# Multi-level access utilities
levels = xamrex.open_amrex_levels("plt_00000", levels=[0, 1, 2])
summary = xamrex.create_level_summary("plt_00000")
# Level information
max_level = xamrex.get_max_level("plt_00000")
available = xamrex.get_available_levels_from_file("plt_00000")
# Load specific levels
ds_level0 = xamrex.load_base_level("plt_00000")
ds_level1 = xamrex.load_level("plt_00000", level=1)# Use custom time dimension name
ds = xr.open_dataset(
plotfiles,
engine='amrex',
level=0,
time_dimension_name='time' # Instead of default 'ocean_time'
)# Rename spatial coordinates
ds = xr.open_dataset(
plotfiles,
engine='amrex',
level=0,
dimension_names={'x': 'longitude', 'y': 'latitude', 'z': 'depth'}
)# Drop variables to save memory
ds = xr.open_dataset(
plotfiles,
engine='amrex',
level=0,
drop_variables=['salt', 'other_field']
)
# Work with large time series efficiently
large_ds = xamrex.open_amrex_time_series("large_simulation/plt_*", level=0)
subset = large_ds.isel(ocean_time=slice(0, 10)) # Lazy slicing
computed = subset.compute() # Load only subset into memoryThe backend automatically:
- Finds the first file with the requested level as a spatial template
- Uses that template for coordinate structure
- Fills missing levels with NaN values for time steps that don't have that level
# Example: Mixed-level time series
# plt_00000: max_level = 0 (base only)
# plt_01000: max_level = 1 (has refinement)
# plt_02000: max_level = 1 (has refinement)
ds_level1 = xr.open_dataset(['plt_00000', 'plt_01000', 'plt_02000'],
engine='amrex', level=1)
# Result: Level 1 dataset with:
# - Time step 0: All NaN (plt_00000 doesn't have level 1)
# - Time step 1: Valid data where level 1 exists, NaN elsewhere
# - Time step 2: Valid data where level 1 exists, NaN elsewhere# Level 0: Base resolution
ds_l0 = xr.open_dataset(files, engine='amrex', level=0)
print(f"Level 0: {dict(ds_l0.sizes)}") # {'ocean_time': 3, 'z': 16, 'y': 15, 'x': 42}
# Level 1: Refined in x,y but not z
ds_l1 = xr.open_dataset(files, engine='amrex', level=1)
print(f"Level 1: {dict(ds_l1.sizes)}") # {'ocean_time': 3, 'z': 16, 'y': 45, 'x': 126}- Lazy Loading: Dask arrays mean large datasets don't overwhelm memory
- Chunked Access: Only load data when and where you need it
- Efficient Concatenation: Time series concatenation preserves lazy evaluation
# Handle hundreds of time steps efficiently
all_files = xamrex.find_amrex_time_series("massive_simulation/", "plt_*")
print(f"Found {len(all_files)} files") # Could be 1000+ files
# Still loads quickly (metadata only)
ds = xamrex.open_amrex_time_series(all_files, level=0)
# Extract just what you need
recent = ds.isel(ocean_time=slice(-10, None)) # Last 10 time steps
subset = recent.sel(x=slice(0.25, 0.75)) # Spatial subset
computed = subset.compute() # Only then load data# Validate compatibility before loading
validation = xamrex.validate_time_series_compatibility(plotfiles, level=1)
if validation['compatible']:
ds = xamrex.open_amrex_time_series(plotfiles, level=1)
else:
print(f"Issues: {validation['issues']}")
print(f"Available fields: {validation['fields']}")- Python >= 3.8
- xarray >= 2023.1.0
- numpy >= 1.20
- dask[array] >= 2021.1
- pandas >= 1.5.0
- Multi-Time User Guide - Detailed multi-time functionality guide
- Examples - Example scripts and Jupyter notebooks
- API Reference - Complete API documentation
# Run test suite
python -m pytest tests/
# Test multi-time functionality specifically
python tests/test_multi_time.pyContributions are welcome! Please feel free to submit a Pull Request.