From 39341960bf4c66d100d4d4c9d7ba7c5f5747ea60 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Fri, 28 Feb 2025 18:55:02 +0000 Subject: [PATCH 01/18] Error on bad rec sizes in files --- src/formats/dmap.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index a484f75..b6cf20d 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -39,6 +39,11 @@ pub trait Record<'a>: rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) as usize; // advance 4 bytes, skipping the "code" field rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() + if rec_end > buffer.len() { + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))) + } else if rec_size <= 0 { + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has non-positive size {} <= 0", slices.len(), rec_start, rec_size))) + } slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); rec_start = rec_end; } From 642bd7966c9a55e1b2eb6b69fd4a0d41e3262f92 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 19 Jun 2025 20:31:59 +0000 Subject: [PATCH 02/18] Created reading functions that are lax w.r.t. corrupt records. They return `([recs], int|None)`, where the second element is the byte where the corrupted records start, or None if no records are corrupted. These functions have the same python signature, but are named `read_dmap_lax`, `read_iqdat_lax`, etc. --- benches/io_benchmarking.rs | 7 ++- src/error.rs | 2 +- src/formats/dmap.rs | 82 ++++++++++++++++++++++++-- src/lib.rs | 116 ++++++++++++++++++++++++++++++++++--- tests/tests.rs | 2 +- 5 files changed, 193 insertions(+), 16 deletions(-) diff --git a/benches/io_benchmarking.rs b/benches/io_benchmarking.rs index 14ea311..08129a1 100644 --- a/benches/io_benchmarking.rs +++ b/benches/io_benchmarking.rs @@ -77,5 +77,10 @@ fn read_snd() -> Vec { let file = File::open("tests/test_files/test.snd").expect("Test file not found"); SndRecord::read_records(file).unwrap() } -criterion_group!(benches, criterion_benchmark); + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = criterion_benchmark +} criterion_main!(benches); diff --git a/src/error.rs b/src/error.rs index 1154375..d82e61f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -33,7 +33,7 @@ pub enum DmapError { /// Errors when reading in multiple records #[error("First error: {1}\nRecords with errors: {0:?}")] - BadRecords(Vec, String) + BadRecords(Vec, String), } impl From for PyErr { diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index b6cf20d..c538bc7 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -40,9 +40,14 @@ pub trait Record<'a>: as usize; // advance 4 bytes, skipping the "code" field rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() if rec_end > buffer.len() { - return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))) + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))); } else if rec_size <= 0 { - return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has non-positive size {} <= 0", slices.len(), rec_start, rec_size))) + return Err(DmapError::InvalidRecord(format!( + "Record {} starting at byte {} has non-positive size {} <= 0", + slices.len(), + rec_start, + rec_size + ))); } slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); rec_start = rec_end; @@ -63,15 +68,68 @@ pub trait Record<'a>: Err(e) => { dmap_errors.push(e); bad_recs.push(i); - }, + } } } if dmap_errors.len() > 0 { - return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string())) + return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string())); } Ok(dmap_records) } + /// Reads from dmap_data and parses into a collection of Records. + /// + /// Returns a tuple of `(good records, Option)`. + fn read_records_partial( + mut dmap_data: impl Read, + ) -> Result<(Vec, Option), DmapError> + where + Self: Sized, + Self: Send, + { + let mut buffer: Vec = vec![]; + dmap_data.read_to_end(&mut buffer)?; + + let mut dmap_records: Vec = vec![]; + let mut bad_byte: Option = None; + + let mut slices: Vec<_> = vec![]; + let mut rec_start: usize = 0; + let mut rec_size: usize; + let mut rec_end: usize; + + let mut rec_starts = vec![]; + while ((rec_start + 2 * i32::size()) as u64) < buffer.len() as u64 { + rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) + as usize; // advance 4 bytes, skipping the "code" field + rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() + if rec_end > buffer.len() || rec_size <= 0 { + bad_byte = Some(rec_start); + rec_start = buffer.len(); // break from loop + } else { + rec_starts.push(rec_start); + slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); + rec_start = rec_end; + } + } + let mut dmap_results: Vec> = vec![]; + dmap_results.par_extend( + slices + .par_iter_mut() + .map(|cursor| Self::parse_record(cursor)), + ); + + for (i, rec) in dmap_results.into_iter().enumerate() { + if let Ok(x) = rec { + dmap_records.push(x); + } else { + bad_byte = Some(rec_starts[i]); + break; + } + } + Ok((dmap_records, bad_byte)) + } + /// Read a DMAP file of type `Self` fn read_file(infile: &PathBuf) -> Result, DmapError> where @@ -88,6 +146,22 @@ pub trait Record<'a>: } } + /// Read a DMAP file of type `Self`, + fn read_file_partial(infile: &PathBuf) -> Result<(Vec, Option), DmapError> + where + Self: Sized, + Self: Send, + { + let file = File::open(infile)?; + match infile.extension() { + Some(ext) if ext == OsStr::new("bz2") => { + let compressor = BzDecoder::new(file); + Self::read_records_partial(compressor) + } + _ => Self::read_records_partial(file), + } + } + /// Reads a record starting from cursor position fn parse_record(cursor: &mut Cursor>) -> Result where diff --git a/src/lib.rs b/src/lib.rs index fdf8c64..57642dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -126,7 +126,8 @@ where }); if !errors.is_empty() { Err(DmapError::BadRecords( - errors.iter().map(|(i, _)| *i).collect(), errors[0].1.to_string() + errors.iter().map(|(i, _)| *i).collect(), + errors[0].1.to_string(), ))? } bytes.par_extend(rec_bytes.into_par_iter().flatten()); @@ -225,17 +226,16 @@ pub fn read_snd(infile: PathBuf) -> Result, DmapError> { SndRecord::read_file(&infile) } -/// Reads the data from infile into a collection of `IndexMap`s +/// Reads the data from infile into `Vec`. +/// +/// Returns `Err` if any records are corrupted. fn read_generic Record<'a> + Send>( infile: PathBuf, ) -> Result>, DmapError> { - match T::read_file(&infile) { - Ok(recs) => { - let new_recs = recs.into_iter().map(|rec| rec.inner()).collect(); - Ok(new_recs) - } - Err(e) => Err(e), - } + Ok(T::read_file(&infile)? + .into_iter() + .map(|rec| rec.inner()) + .collect()) } /// Reads a generic DMAP file, returning a list of dictionaries containing the fields. @@ -294,6 +294,92 @@ fn read_snd_py(infile: PathBuf) -> PyResult>> { read_generic::(infile).map_err(PyErr::from) } +/// Reads the data from infile into a tuple of `([IndexMap], int|None)`, where +/// all valid records are returned, plus optionally the byte of the first record +/// with a corruption within the file. Compatible with RST behaviour. +fn read_generic_lax Record<'a> + Send>( + infile: PathBuf, +) -> Result<(Vec>, Option), DmapError> { + let result = T::read_file_partial(&infile)?; + Ok(( + result.0.into_iter().map(|rec| rec.inner()).collect(), + result.1, + )) +} + +/// Reads a generic DMAP file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_dmap_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_dmap_lax_py( + infile: PathBuf, +) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads an IQDAT file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_iqdat_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_iqdat_lax_py( + infile: PathBuf, +) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads a RAWACF file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_rawacf_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_rawacf_lax_py( + infile: PathBuf, +) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads a FITACF file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_fitacf_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_fitacf_lax_py( + infile: PathBuf, +) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads a GRID file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_grid_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_grid_lax_py( + infile: PathBuf, +) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads a MAP file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_map_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_map_lax_py(infile: PathBuf) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + +/// Reads an SND file, returning a tuple of +/// (list of dictionaries containing the fields, byte where first corrupted record starts). +#[pyfunction] +#[pyo3(name = "read_snd_lax")] +#[pyo3(text_signature = "(infile: str, /)")] +fn read_snd_lax_py(infile: PathBuf) -> PyResult<(Vec>, Option)> { + read_generic_lax::(infile).map_err(PyErr::from) +} + /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// /// **NOTE:** No type checking is done, so the fields may not be written as the expected @@ -357,6 +443,7 @@ fn write_snd_py(recs: Vec>, outfile: PathBuf) -> PyR /// Functions for SuperDARN DMAP file format I/O. #[pymodule] fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Strict read functions m.add_function(wrap_pyfunction!(read_dmap_py, m)?)?; m.add_function(wrap_pyfunction!(read_iqdat_py, m)?)?; m.add_function(wrap_pyfunction!(read_rawacf_py, m)?)?; @@ -364,6 +451,17 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(read_snd_py, m)?)?; m.add_function(wrap_pyfunction!(read_grid_py, m)?)?; m.add_function(wrap_pyfunction!(read_map_py, m)?)?; + + // Lax read functions + m.add_function(wrap_pyfunction!(read_dmap_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_iqdat_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_rawacf_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_fitacf_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_snd_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_grid_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_map_lax_py, m)?)?; + + // Write functions m.add_function(wrap_pyfunction!(write_dmap_py, m)?)?; m.add_function(wrap_pyfunction!(write_iqdat_py, m)?)?; m.add_function(wrap_pyfunction!(write_rawacf_py, m)?)?; diff --git a/tests/tests.rs b/tests/tests.rs index d54f4aa..741ba00 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -5,10 +5,10 @@ use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; +use dmap::{write_dmap, write_fitacf, write_grid, write_iqdat, write_map, write_rawacf, write_snd}; use itertools::izip; use std::fs::remove_file; use std::path::PathBuf; -use dmap::{write_iqdat, write_rawacf, write_fitacf, write_grid, write_map, write_snd, write_dmap}; #[test] fn read_write_generic() { From dadcbd03c61521af172dd125b713c056008c556a Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Tue, 24 Jun 2025 17:10:58 +0000 Subject: [PATCH 03/18] Used macro_rules to reduce similar code blocks. --- Cargo.toml | 1 + src/formats/dmap.rs | 124 ++-- src/formats/fitacf.rs | 56 +- src/formats/generic.rs | 69 +++ src/formats/grid.rs | 58 +- src/formats/iqdat.rs | 57 +- src/formats/map.rs | 56 +- src/formats/mod.rs | 6 +- src/formats/rawacf.rs | 59 +- src/formats/snd.rs | 56 +- src/lib.rs | 26 +- src/types.rs | 1249 ++++++---------------------------------- 12 files changed, 362 insertions(+), 1455 deletions(-) create mode 100644 src/formats/generic.rs diff --git a/Cargo.toml b/Cargo.toml index 4d94615..98f61b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ thiserror = "1.0.63" zerocopy = { version = "0.7.35", features = ["byteorder"] } lazy_static = "1.5.0" bzip2 = "0.4.4" +paste = "1.0.15" [dev-dependencies] criterion = { version = "0.4", features = ["html_reports"] } diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index c538bc7..7ae9322 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -52,6 +52,9 @@ pub trait Record<'a>: slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); rec_start = rec_end; } + if rec_start != buffer.len() { + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} incomplete; has size of {} bytes", slices.len() + 1, rec_start, buffer.len() - rec_start))) + } let mut dmap_results: Vec> = vec![]; dmap_results.par_extend( slices @@ -80,7 +83,7 @@ pub trait Record<'a>: /// Reads from dmap_data and parses into a collection of Records. /// /// Returns a tuple of `(good records, Option)`. - fn read_records_partial( + fn read_records_lax( mut dmap_data: impl Read, ) -> Result<(Vec, Option), DmapError> where @@ -105,7 +108,8 @@ pub trait Record<'a>: rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() if rec_end > buffer.len() || rec_size <= 0 { bad_byte = Some(rec_start); - rec_start = buffer.len(); // break from loop + break + // rec_start = buffer.len(); // break from loop } else { rec_starts.push(rec_start); slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); @@ -147,7 +151,7 @@ pub trait Record<'a>: } /// Read a DMAP file of type `Self`, - fn read_file_partial(infile: &PathBuf) -> Result<(Vec, Option), DmapError> + fn read_file_lax(infile: &PathBuf) -> Result<(Vec, Option), DmapError> where Self: Sized, Self: Send, @@ -156,9 +160,9 @@ pub trait Record<'a>: match infile.extension() { Some(ext) if ext == OsStr::new("bz2") => { let compressor = BzDecoder::new(file); - Self::read_records_partial(compressor) + Self::read_records_lax(compressor) } - _ => Self::read_records_partial(file), + _ => Self::read_records_lax(file), } } @@ -532,67 +536,71 @@ pub trait Record<'a>: } } -#[derive(Debug, PartialEq, Clone)] -pub struct GenericRecord { - pub data: IndexMap, -} -impl GenericRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} +macro_rules! create_record_type { + ($format:ident, $fields:ident) => { + paste::paste! { + use crate::types::{DmapType, DmapField}; + use crate::error::DmapError; + use indexmap::IndexMap; + use crate::formats::dmap::Record; -impl Record<'_> for GenericRecord { - fn inner(self) -> IndexMap { - self.data - } + /// Struct containing the checked fields of a single RAWACF record. + #[derive(Debug, PartialEq, Clone)] + pub struct [< $format:camel Record >] { + pub data: IndexMap, + } - fn new(fields: &mut IndexMap) -> Result { - Ok(GenericRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let mut data_bytes: Vec = vec![]; - let mut num_scalars: i32 = 0; - let mut num_vectors: i32 = 0; + impl [< $format:camel Record >] { + /// Returns the field with name `key`, if it exists in the record. + pub fn get(&self, key: &String) -> Option<&DmapField> { + self.data.get(key) + } + + /// Returns the names of all fields stored in the record. + pub fn keys(&self) -> Vec<&String> { + self.data.keys().collect() + } + } - // Do a first pass, to get all the scalar fields - for (name, val) in self.data.iter() { - if let x @ DmapField::Scalar(_) = val { - data_bytes.extend(name.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_scalars += 1; + impl Record<'_> for [< $format:camel Record>] { + fn inner(self) -> IndexMap { + self.data + } + fn new(fields: &mut IndexMap) -> Result<[< $format:camel Record>], DmapError> { + match Self::check_fields(fields, &$fields) { + Ok(_) => {} + Err(e) => Err(e)?, + } + + Ok([< $format:camel Record >] { + data: fields.to_owned(), + }) + } + fn to_bytes(&self) -> Result, DmapError> { + let (num_scalars, num_vectors, mut data_bytes) = + Self::data_to_bytes(&self.data, &$fields)?; + + let mut bytes: Vec = vec![]; + bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter + bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors + bytes.extend(num_scalars.as_bytes()); + bytes.extend(num_vectors.as_bytes()); + bytes.append(&mut data_bytes); // consumes data_bytes + Ok(bytes) + } } - } - // Do a second pass to convert all the vector fields - for (name, val) in self.data.iter() { - if let x @ DmapField::Vector(_) = val { - data_bytes.extend(name.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_vectors += 1; + + impl TryFrom<&mut IndexMap> for [< $format:camel Record >] { + type Error = DmapError; + + fn try_from(value: &mut IndexMap) -> Result { + Self::coerce::<[< $format:camel Record>]>(value, &$fields) + } } } - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) } } -impl TryFrom<&mut IndexMap> for GenericRecord { - type Error = DmapError; +pub(crate) use create_record_type; - fn try_from(value: &mut IndexMap) -> Result { - GenericRecord::new(value) - } -} diff --git a/src/formats/fitacf.rs b/src/formats/fitacf.rs index 70555b8..a44dfc8 100644 --- a/src/formats/fitacf.rs +++ b/src/formats/fitacf.rs @@ -1,9 +1,6 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; -use std::convert::TryFrom; static SCALAR_FIELDS: [(&str, Type); 49] = [ ("radar.revision.major", Type::Char), @@ -179,52 +176,5 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Clone)] -pub struct FitacfRecord { - pub data: IndexMap, -} - -impl FitacfRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} -impl Record<'_> for FitacfRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &FITACF_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } +create_record_type!(fitacf, FITACF_FIELDS); - Ok(FitacfRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &FITACF_FIELDS)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for FitacfRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &FITACF_FIELDS) - } -} diff --git a/src/formats/generic.rs b/src/formats/generic.rs new file mode 100644 index 0000000..6da4a14 --- /dev/null +++ b/src/formats/generic.rs @@ -0,0 +1,69 @@ +use indexmap::IndexMap; +use crate::error::DmapError; +use crate::formats::dmap::Record; +use crate::types::{DmapField, DmapType}; + +#[derive(Debug, PartialEq, Clone)] +pub struct GenericRecord { + pub data: IndexMap, +} + +impl GenericRecord { + pub fn get(&self, key: &String) -> Option<&DmapField> { + self.data.get(key) + } + pub fn keys(&self) -> Vec<&String> { + self.data.keys().collect() + } +} + +impl Record<'_> for GenericRecord { + fn inner(self) -> IndexMap { + self.data + } + + fn new(fields: &mut IndexMap) -> Result { + Ok(GenericRecord { + data: fields.to_owned(), + }) + } + fn to_bytes(&self) -> Result, DmapError> { + let mut data_bytes: Vec = vec![]; + let mut num_scalars: i32 = 0; + let mut num_vectors: i32 = 0; + + // Do a first pass, to get all the scalar fields + for (name, val) in self.data.iter() { + if let x @ DmapField::Scalar(_) = val { + data_bytes.extend(name.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_scalars += 1; + } + } + // Do a second pass to convert all the vector fields + for (name, val) in self.data.iter() { + if let x @ DmapField::Vector(_) = val { + data_bytes.extend(name.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_vectors += 1; + } + } + let mut bytes: Vec = vec![]; + bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter + bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors + bytes.extend(num_scalars.as_bytes()); + bytes.extend(num_vectors.as_bytes()); + bytes.append(&mut data_bytes); // consumes data_bytes + Ok(bytes) + } +} + +impl TryFrom<&mut IndexMap> for GenericRecord { + type Error = DmapError; + + fn try_from(value: &mut IndexMap) -> Result { + GenericRecord::new(value) + } +} \ No newline at end of file diff --git a/src/formats/grid.rs b/src/formats/grid.rs index c5282a6..50e2859 100644 --- a/src/formats/grid.rs +++ b/src/formats/grid.rs @@ -1,9 +1,6 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; -use std::convert::TryFrom; static SCALAR_FIELDS: [(&str, Type); 12] = [ ("start.year", Type::Short), @@ -113,54 +110,5 @@ lazy_static! { }; } -/// Struct containing the checked fields of a single GRID record. -#[derive(Debug, PartialEq, Clone)] -pub struct GridRecord { - pub data: IndexMap, -} - -impl GridRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - -impl Record<'_> for GridRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &GRID_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } +create_record_type!(grid, GRID_FIELDS); - Ok(GridRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &GRID_FIELDS)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for GridRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &GRID_FIELDS) - } -} diff --git a/src/formats/iqdat.rs b/src/formats/iqdat.rs index 9e6aa5d..a5d180d 100644 --- a/src/formats/iqdat.rs +++ b/src/formats/iqdat.rs @@ -1,9 +1,6 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; -use std::convert::TryFrom; static SCALAR_FIELDS: [(&str, Type); 50] = [ ("radar.revision.major", Type::Char), @@ -95,53 +92,5 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Clone)] -pub struct IqdatRecord { - pub data: IndexMap, -} - -impl IqdatRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - -impl Record<'_> for IqdatRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &IQDAT_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } +create_record_type!(iqdat, IQDAT_FIELDS); - Ok(IqdatRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &IQDAT_FIELDS)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for IqdatRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &IQDAT_FIELDS) - } -} diff --git a/src/formats/map.rs b/src/formats/map.rs index ccf9b24..38790e6 100644 --- a/src/formats/map.rs +++ b/src/formats/map.rs @@ -1,7 +1,5 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; static SCALAR_FIELDS: [(&str, Type); 35] = [ @@ -167,53 +165,5 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Clone)] -pub struct MapRecord { - pub data: IndexMap, -} - -impl MapRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - -impl Record<'_> for MapRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &MAP_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } +create_record_type!(map, MAP_FIELDS); - Ok(MapRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &MAP_FIELDS)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for MapRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &MAP_FIELDS) - } -} diff --git a/src/formats/mod.rs b/src/formats/mod.rs index c9eed85..8dca27c 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,6 +1,6 @@ //! The supported DMAP file formats. -/// The generic [Dmap file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/) +/// The shared functionality (Trait) common to all DMAP record types. pub mod dmap; /// The [FitACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/fitacf/) @@ -20,3 +20,7 @@ pub mod rawacf; /// The [SND file format](https://github.com/SuperDARN/rst/pull/315) pub mod snd; + +/// The generic [Dmap file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/) + +pub mod generic; diff --git a/src/formats/rawacf.rs b/src/formats/rawacf.rs index d04db08..b0784f5 100644 --- a/src/formats/rawacf.rs +++ b/src/formats/rawacf.rs @@ -1,7 +1,5 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; static SCALAR_FIELDS: [(&str, Type); 47] = [ @@ -84,56 +82,5 @@ lazy_static! { }; } -/// Struct containing the checked fields of a single RAWACF record. -#[derive(Debug, PartialEq, Clone)] -pub struct RawacfRecord { - pub data: IndexMap, -} - -impl RawacfRecord { - /// Returns the field with name `key`, if it exists in the record. - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - - /// Returns the names of all fields stored in the record. - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - -impl Record<'_> for RawacfRecord { - fn inner(self) -> IndexMap { - self.data - } - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &RAWACF_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } +create_record_type!(rawacf, RAWACF_FIELDS); - Ok(RawacfRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &RAWACF_FIELDS)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for RawacfRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &RAWACF_FIELDS) - } -} diff --git a/src/formats/snd.rs b/src/formats/snd.rs index 19932cc..7b12abb 100644 --- a/src/formats/snd.rs +++ b/src/formats/snd.rs @@ -1,9 +1,6 @@ -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType, Fields, Type}; -use indexmap::IndexMap; +use crate::formats::dmap::create_record_type; +use crate::types::{Fields, Type}; use lazy_static::lazy_static; -use std::convert::TryFrom; static SCALAR_FIELDS: [(&str, Type); 37] = [ ("radar.revision.major", Type::Char), @@ -84,52 +81,5 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Clone)] -pub struct SndRecord { - pub data: IndexMap, -} - -impl SndRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} -impl Record<'_> for SndRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - match Self::check_fields(fields, &SND_FIELDS) { - Ok(_) => {} - Err(e) => Err(e)?, - } - - Ok(SndRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &SND_FIELDS)?; +create_record_type!(snd, SND_FIELDS); - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for SndRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::(value, &SND_FIELDS) - } -} diff --git a/src/lib.rs b/src/lib.rs index 57642dd..8190b74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,13 +10,14 @@ pub mod formats; pub mod types; use crate::error::DmapError; -use crate::formats::dmap::{GenericRecord, Record}; +use crate::formats::dmap::Record; use crate::formats::fitacf::FitacfRecord; use crate::formats::grid::GridRecord; use crate::formats::iqdat::IqdatRecord; use crate::formats::map::MapRecord; use crate::formats::rawacf::RawacfRecord; use crate::formats::snd::SndRecord; +use crate::formats::generic::GenericRecord; use crate::types::DmapField; use bzip2::read::BzEncoder; use bzip2::Compression; @@ -50,8 +51,11 @@ fn write_to_file(bytes: Vec, outfile: &PathBuf) -> Result<(), std::io::Error file.write_all(&out_bytes) } -/// Writes a collection of `impl Record`s to `outfile` -fn write_generic<'a>(mut recs: Vec>, outfile: &PathBuf) -> Result<(), DmapError> { +/// Writes a collection of `Record`s to `outfile`. +/// +/// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their +/// specific field checks. +pub fn write_records<'a>(mut recs: Vec>, outfile: &PathBuf) -> Result<(), DmapError> { let mut bytes: Vec = vec![]; let (errors, rec_bytes): (Vec<_>, Vec<_>) = recs.par_iter_mut() @@ -72,37 +76,37 @@ fn write_generic<'a>(mut recs: Vec>, outfile: &PathBuf) -> Resul /// Write generic DMAP to `outfile` pub fn write_dmap(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write IQDAT records to `outfile`. pub fn write_iqdat(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write RAWACF records to `outfile`. pub fn write_rawacf(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write FITACF records to `outfile`. pub fn write_fitacf(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write GRID records to `outfile`. pub fn write_grid(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write MAP records to `outfile`. pub fn write_map(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Write SND records to `outfile`. pub fn write_snd(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_generic(recs, outfile) + write_records(recs, outfile) } /// Attempts to convert `recs` to `T` then append to `outfile`. @@ -300,7 +304,7 @@ fn read_snd_py(infile: PathBuf) -> PyResult>> { fn read_generic_lax Record<'a> + Send>( infile: PathBuf, ) -> Result<(Vec>, Option), DmapError> { - let result = T::read_file_partial(&infile)?; + let result = T::read_file_lax(&infile)?; Ok(( result.0.into_iter().map(|rec| rec.inner()).collect(), result.1, diff --git a/src/types.rs b/src/types.rs index e07cfc1..3fd908c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -4,6 +4,7 @@ use indexmap::IndexMap; use numpy::array::PyArray; use numpy::ndarray::ArrayD; use numpy::PyArrayMethods; +use paste::paste; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::{Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python}; @@ -257,97 +258,32 @@ impl DmapVec { /// Copies the data and metadata (dimensions, `Type` key) to raw bytes pub(crate) fn as_bytes(&self) -> Vec { let mut bytes: Vec = DmapType::as_bytes(&self.get_type().key()).to_vec(); - match self { - DmapVec::Char(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Short(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Int(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Long(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Uchar(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Ushort(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Uint(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Ulong(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Float(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } - } - DmapVec::Double(x) => { - bytes.extend((x.ndim() as i32).to_le_bytes()); - for &dim in x.shape().iter().rev() { - bytes.extend((dim as i32).to_le_bytes()); - } - for y in x.iter() { - bytes.append(&mut DmapType::as_bytes(y).to_vec()); + + macro_rules! vec_to_bytes { + ($bytes:ident, $x:ident) => { + { + $bytes.extend(($x.ndim() as i32).to_le_bytes()); + for &dim in $x.shape().iter().rev() { + $bytes.extend((dim as i32).to_le_bytes()); + } + for y in $x.iter() { + $bytes.append(&mut DmapType::as_bytes(y).to_vec()); + } } } + } + + match self { + DmapVec::Char(x) => vec_to_bytes!(bytes, x), + DmapVec::Short(x) => vec_to_bytes!(bytes, x), + DmapVec::Int(x) => vec_to_bytes!(bytes, x), + DmapVec::Long(x) => vec_to_bytes!(bytes, x), + DmapVec::Uchar(x) => vec_to_bytes!(bytes, x), + DmapVec::Ushort(x) => vec_to_bytes!(bytes, x), + DmapVec::Uint(x) => vec_to_bytes!(bytes, x), + DmapVec::Ulong(x) => vec_to_bytes!(bytes, x), + DmapVec::Float(x) => vec_to_bytes!(bytes, x), + DmapVec::Double(x) => vec_to_bytes!(bytes, x), }; bytes } @@ -410,186 +346,64 @@ impl<'py> FromPyObject<'py> for DmapVec { } } } -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Char(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Short(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Int(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Long(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Uchar(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Ushort(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Uint(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Ulong(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Float(value) - } -} -impl From> for DmapVec { - fn from(value: ArrayD) -> Self { - DmapVec::Double(value) - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Char(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Short(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Int(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) +/// Generates trait implementations for infallible conversion into DmapVec and fallible conversion +/// back. +/// Example: `vec_impls!(ArrayD, DmapVec::Char)` will generate `impl From> for +/// DmapVec` and `impl TryFrom for ArrayD` code blocks. +macro_rules! vec_impls { + ($type:ty, $enum_var:path) => { + impl From<$type> for DmapVec { + fn from(value: $type) -> Self { + $enum_var(value) + } } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Long(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; + impl TryFrom for $type { + type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Uchar(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Ushort(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) + fn try_from(value: DmapVec) -> std::result::Result { + if let $enum_var(x) = value { + Ok(x) + } else { + Err(DmapError::InvalidVector( + format!("Cannot convert to {}", stringify!($type)) + )) + } + } } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Uint(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) + impl From<$type> for DmapField { + fn from(value: $type) -> Self { + DmapField::Vector($enum_var(value)) + } } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Ulong(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; + impl TryFrom for $type { + type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Float(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) + fn try_from(value: DmapField) -> std::result::Result { + match value { + DmapField::Vector(x) => x.try_into(), + _ => Err(Self::Error::InvalidVector( + format!("Cannot interpret as {}", stringify!($type)) + )), + } + } } } } -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapVec) -> std::result::Result { - if let DmapVec::Double(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidVector( - "Cannot convert to ArrayD".to_string(), - )) - } - } -} +vec_impls!(ArrayD, DmapVec::Char); +vec_impls!(ArrayD, DmapVec::Short); +vec_impls!(ArrayD, DmapVec::Int); +vec_impls!(ArrayD, DmapVec::Long); +vec_impls!(ArrayD, DmapVec::Uchar); +vec_impls!(ArrayD, DmapVec::Ushort); +vec_impls!(ArrayD, DmapVec::Uint); +vec_impls!(ArrayD, DmapVec::Ulong); +vec_impls!(ArrayD, DmapVec::Float); +vec_impls!(ArrayD, DmapVec::Double); /// A generic field of a DMAP record. /// @@ -618,363 +432,59 @@ impl IntoPy for DmapField { } } } -impl From for DmapField { - fn from(value: i8) -> Self { - DmapField::Scalar(DmapScalar::Char(value)) - } -} -impl From for DmapField { - fn from(value: i16) -> Self { - DmapField::Scalar(DmapScalar::Short(value)) - } -} -impl From for DmapField { - fn from(value: i32) -> Self { - DmapField::Scalar(DmapScalar::Int(value)) - } -} -impl From for DmapField { - fn from(value: i64) -> Self { - DmapField::Scalar(DmapScalar::Long(value)) - } -} -impl From for DmapField { - fn from(value: u8) -> Self { - DmapField::Scalar(DmapScalar::Uchar(value)) - } -} -impl From for DmapField { - fn from(value: u16) -> Self { - DmapField::Scalar(DmapScalar::Ushort(value)) - } -} -impl From for DmapField { - fn from(value: u32) -> Self { - DmapField::Scalar(DmapScalar::Uint(value)) - } -} -impl From for DmapField { - fn from(value: u64) -> Self { - DmapField::Scalar(DmapScalar::Ulong(value)) - } -} -impl From for DmapField { - fn from(value: f32) -> Self { - DmapField::Scalar(DmapScalar::Float(value)) - } -} -impl From for DmapField { - fn from(value: f64) -> Self { - DmapField::Scalar(DmapScalar::Double(value)) - } -} -impl From for DmapField { - fn from(value: String) -> Self { - DmapField::Scalar(DmapScalar::String(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Char(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Short(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Int(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Long(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Uchar(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Ushort(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Uint(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Ulong(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Float(value)) - } -} -impl From> for DmapField { - fn from(value: ArrayD) -> Self { - DmapField::Vector(DmapVec::Double(value)) - } -} -impl TryFrom for i8 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as i8".to_string(), - )), - } - } -} -impl TryFrom for i16 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as i16".to_string(), - )), - } - } -} -impl TryFrom for i32 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as i32".to_string(), - )), - } - } -} -impl TryFrom for i64 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as i64".to_string(), - )), - } - } -} -impl TryFrom for u8 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as u8".to_string(), - )), - } - } -} -impl TryFrom for u16 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as u16".to_string(), - )), - } - } -} -impl TryFrom for u32 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as u32".to_string(), - )), - } - } -} -impl TryFrom for u64 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as u64".to_string(), - )), - } - } -} -impl TryFrom for f32 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as f32".to_string(), - )), - } - } -} -impl TryFrom for f64 { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret as f64".to_string(), - )), - } - } -} -impl TryFrom for String { - type Error = DmapError; - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - "Cannot interpret vector as String".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), +/// Macro for implementing conversion traits between primitives and `DmapField`, `DmapScalar` +/// types. +/// +/// Example: `scalar_impls(i8, DmapScalar::Char)` will implement: +/// `From for DmapField` +/// `TryFrom for i8` +/// `TryFrom for i8` +macro_rules! scalar_impls { + ($type:ty, $enum_var:path) => { + impl From<$type> for DmapField { + fn from(value: $type) -> Self { + DmapField::Scalar($enum_var(value)) + } } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; + impl TryFrom for $type { + type Error = DmapError; - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), + fn try_from(value: DmapField) -> std::result::Result { + match value { + DmapField::Scalar(x) => x.try_into(), + _ => Err(Self::Error::InvalidScalar( + format!("Cannot interpret as {}", stringify!($type)), + )), + } + } } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), + impl TryFrom for $type { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + if let $enum_var(x) = value { + Ok(x) + } else { + Err(DmapError::InvalidScalar(format!( + "Unable to convert {value} to {}", stringify!($type) + ))) + } + } } } } -impl TryFrom for ArrayD { - type Error = DmapError; - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} -impl TryFrom for ArrayD { - type Error = DmapError; - - fn try_from(value: DmapField) -> std::result::Result { - match value { - DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - "Cannot interpret as ArrayD".to_string(), - )), - } - } -} +scalar_impls!(i8, DmapScalar::Char); +scalar_impls!(i16, DmapScalar::Short); +scalar_impls!(i32, DmapScalar::Int); +scalar_impls!(i64, DmapScalar::Long); +scalar_impls!(u8, DmapScalar::Uchar); +scalar_impls!(u16, DmapScalar::Ushort); +scalar_impls!(u32, DmapScalar::Uint); +scalar_impls!(u64, DmapScalar::Ulong); +scalar_impls!(f32, DmapScalar::Float); +scalar_impls!(f64, DmapScalar::Double); +scalar_impls!(String, DmapScalar::String); /// Trait for raw types that can be stored in DMAP files. pub trait DmapType: std::fmt::Debug { @@ -991,192 +501,60 @@ pub trait DmapType: std::fmt::Debug { /// Get the `Type` variant that represents `self` fn dmap_type(&self) -> Type; } -impl DmapType for i8 { - fn size() -> usize { - 1 - } - fn as_bytes(&self) -> Vec { - AsBytes::as_bytes(self).to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Char - } -} -impl DmapType for i16 { - fn size() -> usize { - 2 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 2]; - LittleEndian::write_i16(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Short - } -} -impl DmapType for i32 { - fn size() -> usize { - 4 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 4]; - LittleEndian::write_i32(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Int - } -} -impl DmapType for i64 { - fn size() -> usize { - 8 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 8]; - LittleEndian::write_i64(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Long - } -} -impl DmapType for u8 { - fn size() -> usize { - 1 - } - fn as_bytes(&self) -> Vec { - AsBytes::as_bytes(self).to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Uchar - } -} -impl DmapType for u16 { - fn size() -> usize { - 2 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 2]; - LittleEndian::write_u16(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Ushort - } -} -impl DmapType for u32 { - fn size() -> usize { - 4 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 4]; - LittleEndian::write_u32(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Uint - } -} -impl DmapType for u64 { - fn size() -> usize { - 8 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 8]; - LittleEndian::write_u64(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Ulong - } -} -impl DmapType for f32 { - fn size() -> usize { - 4 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 4]; - LittleEndian::write_f32(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Float - } -} -impl DmapType for f64 { - fn size() -> usize { - 8 - } - fn as_bytes(&self) -> Vec { - let mut bytes = [0; 8]; - LittleEndian::write_f64(&mut bytes, *self); - bytes.to_vec() - } - fn from_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) - } - fn dmap_type(&self) -> Type { - Type::Double + +/// Macro for implementing DmapType trait for primitive types. +/// Example: `type_impls!(i8, Type::Char, 1)` +macro_rules! type_impls { + // This variant captures single-byte types + ($type:ty, $enum_var:path, 1) => { + impl DmapType for $type { + fn size() -> usize { 1 } + fn as_bytes(&self) -> Vec { + AsBytes::as_bytes(self).to_vec() + } + fn from_bytes(bytes: &[u8]) -> Result + where + Self: Sized, + { + Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) + } + fn dmap_type(&self) -> Type { $enum_var } + } + }; + // This variant captures multi-byte primitive types + ($type:ty, $enum_var:path, $num_bytes:expr) => { + paste! { + impl DmapType for $type { + fn size() -> usize { $num_bytes } + fn as_bytes(&self) -> Vec { + let mut bytes = [0; $num_bytes]; + LittleEndian::[< write_ $type >](&mut bytes, *self); + bytes.to_vec() + } + fn from_bytes(bytes: &[u8]) -> Result + where + Self: Sized, + { + Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) + } + fn dmap_type(&self) -> Type { $enum_var } + } + } } } + +type_impls!(i8, Type::Char, 1); +type_impls!(i16, Type::Short, 2); +type_impls!(i32, Type::Int, 4); +type_impls!(i64, Type::Long, 8); +type_impls!(u8, Type::Uchar, 1); +type_impls!(u16, Type::Ushort, 2); +type_impls!(u32, Type::Uint, 4); +type_impls!(u64, Type::Ulong, 8); +type_impls!(f32, Type::Float, 4); +type_impls!(f64, Type::Double, 8); + +// This implementation differs significantly from the others, so it doesn't use the macro impl DmapType for String { fn size() -> usize { 0 @@ -1195,217 +573,6 @@ impl DmapType for String { Type::String } } -impl TryFrom for u8 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as u8), - DmapScalar::Short(x) => Ok(x as u8), - DmapScalar::Int(x) => Ok(x as u8), - DmapScalar::Long(x) => Ok(x as u8), - DmapScalar::Uchar(x) => Ok(x), - DmapScalar::Ushort(x) => Ok(x as u8), - DmapScalar::Uint(x) => Ok(x as u8), - DmapScalar::Ulong(x) => Ok(x as u8), - DmapScalar::Float(x) => Ok(x as u8), - DmapScalar::Double(x) => Ok(x as u8), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to u8" - ))), - } - } -} -impl TryFrom for u16 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as u16), - DmapScalar::Short(x) => Ok(x as u16), - DmapScalar::Int(x) => Ok(x as u16), - DmapScalar::Long(x) => Ok(x as u16), - DmapScalar::Uchar(x) => Ok(x as u16), - DmapScalar::Ushort(x) => Ok(x), - DmapScalar::Uint(x) => Ok(x as u16), - DmapScalar::Ulong(x) => Ok(x as u16), - DmapScalar::Float(x) => Ok(x as u16), - DmapScalar::Double(x) => Ok(x as u16), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to u16" - ))), - } - } -} -impl TryFrom for u32 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as u32), - DmapScalar::Short(x) => Ok(x as u32), - DmapScalar::Int(x) => Ok(x as u32), - DmapScalar::Long(x) => Ok(x as u32), - DmapScalar::Uchar(x) => Ok(x as u32), - DmapScalar::Ushort(x) => Ok(x as u32), - DmapScalar::Uint(x) => Ok(x), - DmapScalar::Ulong(x) => Ok(x as u32), - DmapScalar::Float(x) => Ok(x as u32), - DmapScalar::Double(x) => Ok(x as u32), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to u32" - ))), - } - } -} -impl TryFrom for u64 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as u64), - DmapScalar::Short(x) => Ok(x as u64), - DmapScalar::Int(x) => Ok(x as u64), - DmapScalar::Long(x) => Ok(x as u64), - DmapScalar::Uchar(x) => Ok(x as u64), - DmapScalar::Ushort(x) => Ok(x as u64), - DmapScalar::Uint(x) => Ok(x as u64), - DmapScalar::Ulong(x) => Ok(x), - DmapScalar::Float(x) => Ok(x as u64), - DmapScalar::Double(x) => Ok(x as u64), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to u64" - ))), - } - } -} -impl TryFrom for i8 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x), - DmapScalar::Short(x) => Ok(x as i8), - DmapScalar::Int(x) => Ok(x as i8), - DmapScalar::Long(x) => Ok(x as i8), - DmapScalar::Uchar(x) => Ok(x as i8), - DmapScalar::Ushort(x) => Ok(x as i8), - DmapScalar::Uint(x) => Ok(x as i8), - DmapScalar::Ulong(x) => Ok(x as i8), - DmapScalar::Float(x) => Ok(x as i8), - DmapScalar::Double(x) => Ok(x as i8), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to i8" - ))), - } - } -} -impl TryFrom for i16 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as i16), - DmapScalar::Short(x) => Ok(x), - DmapScalar::Int(x) => Ok(x as i16), - DmapScalar::Long(x) => Ok(x as i16), - DmapScalar::Uchar(x) => Ok(x as i16), - DmapScalar::Ushort(x) => Ok(x as i16), - DmapScalar::Uint(x) => Ok(x as i16), - DmapScalar::Ulong(x) => Ok(x as i16), - DmapScalar::Float(x) => Ok(x as i16), - DmapScalar::Double(x) => Ok(x as i16), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to i16" - ))), - } - } -} -impl TryFrom for i32 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as i32), - DmapScalar::Short(x) => Ok(x as i32), - DmapScalar::Int(x) => Ok(x), - DmapScalar::Long(x) => Ok(x as i32), - DmapScalar::Uchar(x) => Ok(x as i32), - DmapScalar::Ushort(x) => Ok(x as i32), - DmapScalar::Uint(x) => Ok(x as i32), - DmapScalar::Ulong(x) => Ok(x as i32), - DmapScalar::Float(x) => Ok(x as i32), - DmapScalar::Double(x) => Ok(x as i32), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to i32" - ))), - } - } -} -impl TryFrom for i64 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as i64), - DmapScalar::Short(x) => Ok(x as i64), - DmapScalar::Int(x) => Ok(x as i64), - DmapScalar::Long(x) => Ok(x), - DmapScalar::Uchar(x) => Ok(x as i64), - DmapScalar::Ushort(x) => Ok(x as i64), - DmapScalar::Uint(x) => Ok(x as i64), - DmapScalar::Ulong(x) => Ok(x as i64), - DmapScalar::Float(x) => Ok(x as i64), - DmapScalar::Double(x) => Ok(x as i64), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to i64" - ))), - } - } -} -impl TryFrom for f32 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as f32), - DmapScalar::Short(x) => Ok(x as f32), - DmapScalar::Int(x) => Ok(x as f32), - DmapScalar::Long(x) => Ok(x as f32), - DmapScalar::Uchar(x) => Ok(x as f32), - DmapScalar::Ushort(x) => Ok(x as f32), - DmapScalar::Uint(x) => Ok(x as f32), - DmapScalar::Ulong(x) => Ok(x as f32), - DmapScalar::Float(x) => Ok(x), - DmapScalar::Double(x) => Ok(x as f32), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to f32" - ))), - } - } -} -impl TryFrom for f64 { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::Char(x) => Ok(x as f64), - DmapScalar::Short(x) => Ok(x as f64), - DmapScalar::Int(x) => Ok(x as f64), - DmapScalar::Long(x) => Ok(x as f64), - DmapScalar::Uchar(x) => Ok(x as f64), - DmapScalar::Ushort(x) => Ok(x as f64), - DmapScalar::Uint(x) => Ok(x as f64), - DmapScalar::Ulong(x) => Ok(x as f64), - DmapScalar::Float(x) => Ok(x as f64), - DmapScalar::Double(x) => Ok(x), - DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to f64" - ))), - } - } -} -impl TryFrom for String { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - match value { - DmapScalar::String(x) => Ok(x), - x => Err(DmapError::InvalidScalar(format!( - "Unable to convert {x} to String" - ))), - } - } -} /// Verify that `name` exists in `fields` and is of the correct `Type`. pub fn check_scalar( @@ -1610,68 +777,28 @@ pub(crate) fn parse_vector( record_size ))); } - + + macro_rules! dmapvec_from_cursor { + ($type:ty, $enum_var:path, $dims:ident, $cursor:ident, $num_elements:ident, $name:ident) => { + $enum_var( + ArrayD::from_shape_vec($dims, read_vector::<$type>($cursor, $num_elements)?) + .map_err(|e| { + DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) + })? + ) + } + } let vector: DmapVec = match data_type { - Type::Char => DmapVec::Char( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Short => DmapVec::Short( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Int => DmapVec::Int( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Long => DmapVec::Long( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Uchar => DmapVec::Uchar( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Ushort => DmapVec::Ushort( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Uint => DmapVec::Uint( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Ulong => DmapVec::Ulong( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Float => DmapVec::Float( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), - Type::Double => DmapVec::Double( - ArrayD::from_shape_vec(dimensions, read_vector::(cursor, total_elements)?) - .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })?, - ), + Type::Char => dmapvec_from_cursor!(i8, DmapVec::Char, dimensions, cursor, total_elements, name), + Type::Short => dmapvec_from_cursor!(i16, DmapVec::Short, dimensions, cursor, total_elements, name), + Type::Int => dmapvec_from_cursor!(i32, DmapVec::Int, dimensions, cursor, total_elements, name), + Type::Long => dmapvec_from_cursor!(i64, DmapVec::Long, dimensions, cursor, total_elements, name), + Type::Uchar => dmapvec_from_cursor!(u8, DmapVec::Uchar, dimensions, cursor, total_elements, name), + Type::Ushort => dmapvec_from_cursor!(u16, DmapVec::Ushort, dimensions, cursor, total_elements, name), + Type::Uint => dmapvec_from_cursor!(u32, DmapVec::Uint, dimensions, cursor, total_elements, name), + Type::Ulong => dmapvec_from_cursor!(u64, DmapVec::Ulong, dimensions, cursor, total_elements, name), + Type::Float => dmapvec_from_cursor!(f32, DmapVec::Float, dimensions, cursor, total_elements, name), + Type::Double => dmapvec_from_cursor!(f64, DmapVec::Double, dimensions, cursor, total_elements, name), _ => { return Err(DmapError::InvalidVector(format!( "Invalid type {} for DMAP vector {}", From 9f0575d5e6b7fd9e4b4757e1bc80adbfcfd32ebd Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Tue, 24 Jun 2025 19:50:17 +0000 Subject: [PATCH 04/18] Updated tests.rs to generate with macro_rules! --- tests/tests.rs | 337 ++++++++++++++----------------------------------- 1 file changed, 93 insertions(+), 244 deletions(-) diff --git a/tests/tests.rs b/tests/tests.rs index 741ba00..68d71df 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,256 +1,105 @@ -use dmap::formats::dmap::{GenericRecord, Record}; +use dmap::formats::dmap::Record; use dmap::formats::fitacf::FitacfRecord; use dmap::formats::grid::GridRecord; use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; -use dmap::{write_dmap, write_fitacf, write_grid, write_iqdat, write_map, write_rawacf, write_snd}; +use dmap::formats::generic::GenericRecord; +use dmap::{write_dmap, write_iqdat, write_rawacf, write_fitacf, write_grid, write_map, write_snd}; use itertools::izip; -use std::fs::remove_file; +use paste::paste; +use std::fs::{File, remove_file}; +use std::io::Write; use std::path::PathBuf; -#[test] -fn read_write_generic() { - let path = PathBuf::from("tests/test_files/test.rawacf"); - let tempfile = PathBuf::from("tests/test_files/generic.rawacf"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.rawacf.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("generic.rawacf.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = GenericRecord::read_file(&path).expect("Unable to read test.rawacf"); - let zipped_recs = GenericRecord::read_file(&path_bz2).expect("Cannot read test.rawacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_dmap(data.clone(), &tempfile).expect("Unable to write tmp.rawacf"); - let new_recs = GenericRecord::read_file(&tempfile).expect("Cannot read tmp.rawacf"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_dmap(data.clone(), &temp_bz2).expect("Unable to write tmp.rawacf.bz2"); - let new_recs = GenericRecord::read_file(&temp_bz2).expect("Cannot read tmp.rawacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete generic.rawacf"); - remove_file(&temp_bz2).expect("Unable to delete generic.rawacf.bz2"); -} - -#[test] -fn read_write_iqdat() { - let path = PathBuf::from("tests/test_files/test.iqdat"); - let tempfile = PathBuf::from("tests/test_files/tmp.iqdat"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.iqdat.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.iqdat.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = IqdatRecord::read_file(&path).expect("Unable to read test.iqdat"); - let zipped_recs = IqdatRecord::read_file(&path_bz2).expect("Cannot read test.iqdat.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_iqdat(data.clone(), &tempfile).expect("Unable to write tmp.iqdat"); - let new_recs = IqdatRecord::read_file(&tempfile).expect("Cannot read tmp.iqdat"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_iqdat(data.clone(), &temp_bz2).expect("Unable to write tmp.iqdat.bz2"); - let new_recs = IqdatRecord::read_file(&temp_bz2).expect("Cannot read tmp.iqdat.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.iqdat"); - remove_file(&temp_bz2).expect("Unable to delete tmp.iqdat.bz2"); -} - -#[test] -fn read_write_rawacf() { - let path = PathBuf::from("tests/test_files/test.rawacf"); - let tempfile = PathBuf::from("tests/test_files/tmp.rawacf"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.rawacf.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.rawacf.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = RawacfRecord::read_file(&path).expect("Unable to read test.rawacf"); - let zipped_recs = RawacfRecord::read_file(&path_bz2).expect("Cannot read test.rawacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_rawacf(data.clone(), &tempfile).expect("Unable to write tmp.rawacf"); - let new_recs = RawacfRecord::read_file(&tempfile).expect("Cannot read tmp.rawacf"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_rawacf(data.clone(), &temp_bz2).expect("Unable to write tmp.rawacf.bz2"); - let new_recs = RawacfRecord::read_file(&temp_bz2).expect("Cannot read tmp.rawacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.rawacf"); - remove_file(&temp_bz2).expect("Unable to delete tmp.rawacf.bz2"); -} - -#[test] -fn read_write_fitacf() { - let path = PathBuf::from("tests/test_files/test.fitacf"); - let tempfile = PathBuf::from("tests/test_files/tmp.fitacf"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.fitacf.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.fitacf.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = FitacfRecord::read_file(&path).expect("Unable to read test.fitacf"); - let zipped_recs = FitacfRecord::read_file(&path_bz2).expect("Cannot read test.fitacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_fitacf(data.clone(), &tempfile).expect("Unable to write tmp.fitacf"); - let new_recs = FitacfRecord::read_file(&tempfile).expect("Cannot read tmp.fitacf"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_fitacf(data.clone(), &temp_bz2).expect("Unable to write tmp.fitacf.bz2"); - let new_recs = FitacfRecord::read_file(&temp_bz2).expect("Cannot read tmp.fitacf.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.fitacf"); - remove_file(&temp_bz2).expect("Unable to delete tmp.fitacf.bz2"); -} - -#[test] -fn read_write_grid() { - let path = PathBuf::from("tests/test_files/test.grid"); - let tempfile = PathBuf::from("tests/test_files/tmp.grid"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.grid.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.grid.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = GridRecord::read_file(&path).expect("Unable to read test.grid"); - let zipped_recs = GridRecord::read_file(&path_bz2).expect("Cannot read test.grid.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_grid(data.clone(), &tempfile).expect("Unable to write tmp.grid"); - let new_recs = GridRecord::read_file(&tempfile).expect("Cannot read tmp.grid"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_grid(data.clone(), &temp_bz2).expect("Unable to write tmp.grid.bz2"); - let new_recs = GridRecord::read_file(&temp_bz2).expect("Cannot read tmp.grid.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.grid"); - remove_file(&temp_bz2).expect("Unable to delete tmp.grid.bz2"); +/// Create tests for I/O on a given DMAP type. +macro_rules! make_test { + ($record_type:ident) => { + paste! { + #[test] + fn [< test_ $record_type _io >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); + let mut tempfile: PathBuf = filename.clone(); + tempfile.set_file_name(format!("tmp.{}", stringify!($record_type))); + + let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); + + _ = [< write_ $record_type >](data.clone(), &tempfile).expect("Unable to write to file"); + let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); + for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { + assert_eq!(read_rec, written_rec) + } + + // Clean up tempfile + remove_file(&tempfile).expect("Unable to delete tempfile"); + } + + #[test] + fn [< test_ $record_type _bz2_io >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}.bz2", stringify!($record_type))); + let mut tempfile: PathBuf = filename.clone(); + tempfile.set_file_name(format!("tmp.{}.bz2", stringify!($record_type))); + + let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); + + _ = [< write_ $record_type >](data.clone(), &tempfile).expect("Unable to write to file"); + let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); + for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { + assert_eq!(read_rec, written_rec) + } + + // Clean up tempfile + remove_file(&tempfile).expect("Unable to delete tempfile"); + } + + + #[test] + fn [< test_ $record_type _lax_io >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); + let mut tempfile: PathBuf = filename.clone(); + tempfile.set_file_name(format!("tmp.{}.corrupt", stringify!($record_type))); + + let _ = std::fs::copy(filename.clone(), tempfile.clone()).expect("Could not copy to tempfile"); + let mut file = File::options().append(true).open(tempfile.clone()).unwrap(); + writeln!(&mut file, "not a valid record").expect("Could not write to tempfile"); + + let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); + let (lax_data, bad_byte) = [< $record_type:camel Record >]::read_file_lax(&tempfile).expect("Unable to read tempfile"); + assert!(bad_byte.is_some()); + assert_eq!(bad_byte.unwrap(), (file.metadata().expect("Couldn't read tempfile metadata").len() as usize - 19)); + for (ref read_rec, ref lax_rec) in izip!(data.iter(), lax_data.iter()) { + assert_eq!(read_rec, lax_rec) + } + + // Clean up tempfile + remove_file(&tempfile).expect("Unable to delete tempfile"); + } + + #[test] + fn [< test_ $record_type _generic_io >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); + let mut tempfile: PathBuf = filename.clone(); + tempfile.set_file_name(format!("tmp.{}.generic", stringify!($record_type))); + + let gen_data = GenericRecord::read_file(&filename).expect("Unable to read file"); + _ = write_dmap(gen_data.clone(), &tempfile).expect("Unable to write to file"); + let new_recs = GenericRecord::read_file(&tempfile).expect("Cannot read tempfile"); + for (new_rec, ref_rec) in izip!(new_recs.iter(), gen_data.iter()) { + assert_eq!(new_rec, ref_rec) + } + + // Clean up tempfile + remove_file(&tempfile).expect("Unable to delete tempfile"); + } + } + }; } -#[test] -fn read_write_map() { - let path = PathBuf::from("tests/test_files/test.map"); - let tempfile = PathBuf::from("tests/test_files/tmp.map"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.map.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.map.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = MapRecord::read_file(&path).expect("Unable to read test.map"); - let zipped_recs = MapRecord::read_file(&path_bz2).expect("Cannot read test.map.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } +make_test!(iqdat); +make_test!(rawacf); +make_test!(fitacf); +make_test!(grid); +make_test!(map); +make_test!(snd); - // Write to a regular file, and then read back in and compare contents - _ = write_map(data.clone(), &tempfile).expect("Unable to write tmp.map"); - let new_recs = MapRecord::read_file(&tempfile).expect("Cannot read tmp.map"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_map(data.clone(), &temp_bz2).expect("Unable to write tmp.map.bz2"); - let new_recs = MapRecord::read_file(&temp_bz2).expect("Cannot read tmp.map.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.map"); - remove_file(&temp_bz2).expect("Unable to delete tmp.map.bz2"); -} - -#[test] -fn read_write_snd() { - let path = PathBuf::from("tests/test_files/test.snd"); - let tempfile = PathBuf::from("tests/test_files/tmp.snd"); - let mut path_bz2: PathBuf = path.clone(); - path_bz2.set_file_name("test.snd.bz2"); - let mut temp_bz2: PathBuf = tempfile.clone(); - temp_bz2.set_file_name("tmp.snd.bz2"); - - // Read in test files and verify they have the same contents (both regular and zipped versions) - let data = SndRecord::read_file(&path).expect("Unable to read test.snd"); - let zipped_recs = SndRecord::read_file(&path_bz2).expect("Cannot read test.snd.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), zipped_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a regular file, and then read back in and compare contents - _ = write_snd(data.clone(), &tempfile).expect("Unable to write tmp.snd"); - let new_recs = SndRecord::read_file(&tempfile).expect("Cannot read tmp.snd"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Write to a zipped file, and then read back in and compare contents - _ = write_snd(data.clone(), &temp_bz2).expect("Unable to write tmp.snd.bz2"); - let new_recs = SndRecord::read_file(&temp_bz2).expect("Cannot read tmp.snd.bz2"); - for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { - assert_eq!(read_rec, written_rec) - } - - // Clean up the temp files - remove_file(&tempfile).expect("Unable to delete tmp.snd"); - remove_file(&temp_bz2).expect("Unable to delete tmp.snd.bz2"); -} From 3969b0cac6a5e9d51f3d8c792089ce68e35da3e9 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Tue, 24 Jun 2025 20:12:29 +0000 Subject: [PATCH 05/18] Adding macros to lib.rs --- src/lib.rs | 89 ++++++++++++++++-------------------------------------- 1 file changed, 26 insertions(+), 63 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8190b74..80cde2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,7 @@ use crate::types::DmapField; use bzip2::read::BzEncoder; use bzip2::Compression; use indexmap::IndexMap; +use paste::paste; use pyo3::prelude::*; use rayon::iter::Either; use rayon::prelude::*; @@ -301,7 +302,7 @@ fn read_snd_py(infile: PathBuf) -> PyResult>> { /// Reads the data from infile into a tuple of `([IndexMap], int|None)`, where /// all valid records are returned, plus optionally the byte of the first record /// with a corruption within the file. Compatible with RST behaviour. -fn read_generic_lax Record<'a> + Send>( +fn read_lax Record<'a> + Send>( infile: PathBuf, ) -> Result<(Vec>, Option), DmapError> { let result = T::read_file_lax(&infile)?; @@ -311,6 +312,23 @@ fn read_generic_lax Record<'a> + Send>( )) } +macro_rules! read_lax_py { + ($name:ident, $py_name:literal) => { + paste! { + /// Reads a [< $name:upper >] file, returning a tuple of + /// (list of dictionaries containing the fields, byte where first corrupted record starts). + #[pyfunction] + #[pyo3(name = $py_name)] + #[pyo3(text_signature = "(infile: str, /)")] + fn [< read_ $name _lax_py >]( + infile: PathBuf, + ) -> PyResult<(Vec>, Option)> { + read_lax::<[< $name:camel Record >]>(infile).map_err(PyErr::from) + } + } + } +} + /// Reads a generic DMAP file, returning a tuple of /// (list of dictionaries containing the fields, byte where first corrupted record starts). #[pyfunction] @@ -319,70 +337,15 @@ fn read_generic_lax Record<'a> + Send>( fn read_dmap_lax_py( infile: PathBuf, ) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} - -/// Reads an IQDAT file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_iqdat_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_iqdat_lax_py( - infile: PathBuf, -) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) + read_lax::(infile).map_err(PyErr::from) } -/// Reads a RAWACF file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_rawacf_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_rawacf_lax_py( - infile: PathBuf, -) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} - -/// Reads a FITACF file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_fitacf_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_fitacf_lax_py( - infile: PathBuf, -) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} - -/// Reads a GRID file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_grid_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_grid_lax_py( - infile: PathBuf, -) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} - -/// Reads a MAP file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_map_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_map_lax_py(infile: PathBuf) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} - -/// Reads an SND file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_snd_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_snd_lax_py(infile: PathBuf) -> PyResult<(Vec>, Option)> { - read_generic_lax::(infile).map_err(PyErr::from) -} +read_lax_py!(iqdat, "read_iqdat_py"); +read_lax_py!(rawacf, "read_rawacf_py"); +read_lax_py!(fitacf, "read_fitacf_py"); +read_lax_py!(grid, "read_grid_py"); +read_lax_py!(map, "read_map_py"); +read_lax_py!(snd, "read_snd_py"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// From 54eb4558acb08e26d4817de5afc61e4ab4609aaf Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 25 Jun 2025 16:11:59 +0000 Subject: [PATCH 06/18] Some larger refactoring with macro_rules. * Renamed `GenericRecord` to `DmapRecord`, and moved into dmap.rs file. * Moved dmap.rs file containing `Record` trait to src/record.rs * Finished using macro_rules to simplify lib.rs * Used macro_rules to simplify io_benchmarking.rs --- benches/io_benchmarking.rs | 64 ++-- src/formats/dmap.rs | 639 +++---------------------------------- src/formats/fitacf.rs | 2 +- src/formats/generic.rs | 69 ---- src/formats/grid.rs | 2 +- src/formats/iqdat.rs | 2 +- src/formats/map.rs | 2 +- src/formats/mod.rs | 5 +- src/formats/rawacf.rs | 2 +- src/formats/snd.rs | 2 +- src/lib.rs | 336 ++++++------------- src/record.rs | 604 +++++++++++++++++++++++++++++++++++ tests/tests.rs | 8 +- 13 files changed, 776 insertions(+), 961 deletions(-) delete mode 100644 src/formats/generic.rs create mode 100644 src/record.rs diff --git a/benches/io_benchmarking.rs b/benches/io_benchmarking.rs index 08129a1..3773840 100644 --- a/benches/io_benchmarking.rs +++ b/benches/io_benchmarking.rs @@ -1,11 +1,12 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use dmap::formats::dmap::Record; +use dmap::record::Record; use dmap::formats::fitacf::FitacfRecord; use dmap::formats::grid::GridRecord; use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; +use paste::paste; use std::fs::File; fn criterion_benchmark(c: &mut Criterion) { @@ -15,12 +16,6 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("Read GRID", |b| b.iter(|| read_grid())); c.bench_function("Read SND", |b| b.iter(|| read_snd())); c.bench_function("Read MAP", |b| b.iter(|| read_map())); - // c.bench_function("Read Full-size RAWACF", |b| { - // b.iter(|| read_fullsize_rawacf()) - // }); - // c.bench_function("Read Full-size FITACF", |b| { - // b.iter(|| read_fullsize_fitacf()) - // }); // let records = read_iqdat(); // c.bench_with_input( @@ -30,32 +25,24 @@ fn criterion_benchmark(c: &mut Criterion) { // ); } -fn read_fitacf() -> Vec { - let file = File::open("tests/test_files/test.fitacf").expect("Test file not found"); - FitacfRecord::read_records(file).unwrap() +/// Generates benchmark functions for a given DMAP record type. +macro_rules! read_type { + ($type:ident) => { + paste! { + fn [< read_ $type >]() -> Vec<[< $type:camel Record >]> { + let file = File::open(format!("tests/test_files/test.{}", stringify!($type))).expect("Test file not found"); + [< $type:camel Record >]::read_records(file).unwrap() + } + } + } } -fn read_rawacf() -> Vec { - let file = File::open("tests/test_files/test.rawacf").expect("Test file not found"); - RawacfRecord::read_records(file).unwrap() -} - -fn read_fullsize_rawacf() -> Vec { - let file = File::open("tests/test_files/20210607.1801.00.cly.a.rawacf.mean") - .expect("Test file not found"); - RawacfRecord::read_records(file).unwrap() -} - -fn read_fullsize_fitacf() -> Vec { - let file = - File::open("tests/test_files/20210607.1801.00.cly.a.fitacf").expect("Test file not found"); - FitacfRecord::read_records(file).unwrap() -} - -fn read_iqdat() -> Vec { - let file = File::open("tests/test_files/test.iqdat").expect("Test file not found"); - IqdatRecord::read_records(file).unwrap() -} +read_type!(iqdat); +read_type!(rawacf); +read_type!(fitacf); +read_type!(grid); +read_type!(map); +read_type!(snd); // fn write_iqdat(records: &Vec) { // let file = File::open("tests/test_files/test.iqdat").expect("Test file not found"); @@ -63,21 +50,6 @@ fn read_iqdat() -> Vec { // dmap::to_file("tests/test_files/temp.iqdat", records).unwrap(); // } -fn read_grid() -> Vec { - let file = File::open("tests/test_files/test.grid").expect("Test file not found"); - GridRecord::read_records(file).unwrap() -} - -fn read_map() -> Vec { - let file = File::open("tests/test_files/test.map").expect("Test file not found"); - MapRecord::read_records(file).unwrap() -} - -fn read_snd() -> Vec { - let file = File::open("tests/test_files/test.snd").expect("Test file not found"); - SndRecord::read_records(file).unwrap() -} - criterion_group! { name = benches; config = Criterion::default(); diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index 7ae9322..78dbfd5 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -1,606 +1,73 @@ -//! Defines the `Record` trait, which contains the shared behaviour that all -//! DMAP record types must have. Also defines the `GenericRecord` struct which -//! implements `Record`, which can be used for reading/writing DMAP files without -//! checking that certain fields are or are not present, or have a given type. +//! Defines the `DmapRecord` struct which implements `Record`, which can be used +//! for reading/writing DMAP files without checking that certain fields are or +//! are not present, or have a given type. -use crate::error::DmapError; -use crate::types::{parse_scalar, parse_vector, read_data, DmapField, DmapType, DmapVec, Fields}; -use bzip2::read::BzDecoder; use indexmap::IndexMap; -use rayon::prelude::*; -use std::ffi::OsStr; -use std::fmt::Debug; -use std::fs::File; -use std::io::{Cursor, Read}; -use std::path::PathBuf; - -pub trait Record<'a>: - Debug + Send + TryFrom<&'a mut IndexMap, Error = DmapError> -{ - /// Gets the underlying data of the Record. - fn inner(self) -> IndexMap; - - /// Reads from dmap_data and parses into a collection of Records. - /// - /// Returns `DmapError` if dmap_data cannot be read or contains invalid data. - fn read_records(mut dmap_data: impl Read) -> Result, DmapError> - where - Self: Sized, - Self: Send, - { - let mut buffer: Vec = vec![]; - dmap_data.read_to_end(&mut buffer)?; - - let mut slices: Vec<_> = vec![]; - let mut rec_start: usize = 0; - let mut rec_size: usize; - let mut rec_end: usize; - while ((rec_start + 2 * i32::size()) as u64) < buffer.len() as u64 { - rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) - as usize; // advance 4 bytes, skipping the "code" field - rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() - if rec_end > buffer.len() { - return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))); - } else if rec_size <= 0 { - return Err(DmapError::InvalidRecord(format!( - "Record {} starting at byte {} has non-positive size {} <= 0", - slices.len(), - rec_start, - rec_size - ))); - } - slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); - rec_start = rec_end; - } - if rec_start != buffer.len() { - return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} incomplete; has size of {} bytes", slices.len() + 1, rec_start, buffer.len() - rec_start))) - } - let mut dmap_results: Vec> = vec![]; - dmap_results.par_extend( - slices - .par_iter_mut() - .map(|cursor| Self::parse_record(cursor)), - ); - - let mut dmap_records: Vec = vec![]; - let mut bad_recs: Vec = vec![]; - let mut dmap_errors: Vec = vec![]; - for (i, rec) in dmap_results.into_iter().enumerate() { - match rec { - Ok(x) => dmap_records.push(x), - Err(e) => { - dmap_errors.push(e); - bad_recs.push(i); - } - } - } - if dmap_errors.len() > 0 { - return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string())); - } - Ok(dmap_records) - } - - /// Reads from dmap_data and parses into a collection of Records. - /// - /// Returns a tuple of `(good records, Option)`. - fn read_records_lax( - mut dmap_data: impl Read, - ) -> Result<(Vec, Option), DmapError> - where - Self: Sized, - Self: Send, - { - let mut buffer: Vec = vec![]; - dmap_data.read_to_end(&mut buffer)?; - - let mut dmap_records: Vec = vec![]; - let mut bad_byte: Option = None; - - let mut slices: Vec<_> = vec![]; - let mut rec_start: usize = 0; - let mut rec_size: usize; - let mut rec_end: usize; +use crate::error::DmapError; +use crate::record::Record; +use crate::types::{DmapField, DmapType}; - let mut rec_starts = vec![]; - while ((rec_start + 2 * i32::size()) as u64) < buffer.len() as u64 { - rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) - as usize; // advance 4 bytes, skipping the "code" field - rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() - if rec_end > buffer.len() || rec_size <= 0 { - bad_byte = Some(rec_start); - break - // rec_start = buffer.len(); // break from loop - } else { - rec_starts.push(rec_start); - slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); - rec_start = rec_end; - } - } - let mut dmap_results: Vec> = vec![]; - dmap_results.par_extend( - slices - .par_iter_mut() - .map(|cursor| Self::parse_record(cursor)), - ); +#[derive(Debug, PartialEq, Clone)] +pub struct DmapRecord { + pub data: IndexMap, +} - for (i, rec) in dmap_results.into_iter().enumerate() { - if let Ok(x) = rec { - dmap_records.push(x); - } else { - bad_byte = Some(rec_starts[i]); - break; - } - } - Ok((dmap_records, bad_byte)) +impl DmapRecord { + pub fn get(&self, key: &String) -> Option<&DmapField> { + self.data.get(key) } - - /// Read a DMAP file of type `Self` - fn read_file(infile: &PathBuf) -> Result, DmapError> - where - Self: Sized, - Self: Send, - { - let file = File::open(infile)?; - match infile.extension() { - Some(ext) if ext == OsStr::new("bz2") => { - let compressor = BzDecoder::new(file); - Self::read_records(compressor) - } - _ => Self::read_records(file), - } + pub fn keys(&self) -> Vec<&String> { + self.data.keys().collect() } +} - /// Read a DMAP file of type `Self`, - fn read_file_lax(infile: &PathBuf) -> Result<(Vec, Option), DmapError> - where - Self: Sized, - Self: Send, - { - let file = File::open(infile)?; - match infile.extension() { - Some(ext) if ext == OsStr::new("bz2") => { - let compressor = BzDecoder::new(file); - Self::read_records_lax(compressor) - } - _ => Self::read_records_lax(file), - } +impl Record<'_> for DmapRecord { + fn inner(self) -> IndexMap { + self.data } - /// Reads a record starting from cursor position - fn parse_record(cursor: &mut Cursor>) -> Result - where - Self: Sized, - { - let bytes_already_read = cursor.position(); - let _code = read_data::(cursor).map_err(|e| { - DmapError::InvalidRecord(format!( - "Cannot interpret code at byte {}: {e}", - bytes_already_read - )) - })?; - let size = read_data::(cursor).map_err(|e| { - DmapError::InvalidRecord(format!( - "Cannot interpret size at byte {}: {e}", - bytes_already_read + i32::size() as u64 - )) - })?; - - // adding 8 bytes because code and size are part of the record. - if size as u64 > cursor.get_ref().len() as u64 - cursor.position() + 2 * i32::size() as u64 - { - return Err(DmapError::InvalidRecord(format!( - "Record size {size} at byte {} bigger than remaining buffer {}", - cursor.position() - i32::size() as u64, - cursor.get_ref().len() as u64 - cursor.position() + 2 * i32::size() as u64 - ))); - } else if size <= 0 { - return Err(DmapError::InvalidRecord(format!("Record size {size} <= 0"))); - } - - let num_scalars = read_data::(cursor).map_err(|e| { - DmapError::InvalidRecord(format!( - "Cannot interpret number of scalars at byte {}: {e}", - cursor.position() - i32::size() as u64 - )) - })?; - let num_vectors = read_data::(cursor).map_err(|e| { - DmapError::InvalidRecord(format!( - "Cannot interpret number of vectors at byte {}: {e}", - cursor.position() - i32::size() as u64 - )) - })?; - if num_scalars <= 0 { - return Err(DmapError::InvalidRecord(format!( - "Number of scalars {num_scalars} at byte {} <= 0", - cursor.position() - 2 * i32::size() as u64 - ))); - } else if num_vectors <= 0 { - return Err(DmapError::InvalidRecord(format!( - "Number of vectors {num_vectors} at byte {} <= 0", - cursor.position() - i32::size() as u64 - ))); - } else if num_scalars + num_vectors > size { - return Err(DmapError::InvalidRecord(format!( - "Number of scalars {num_scalars} plus vectors {num_vectors} greater than size '{size}'"))); - } - - let mut fields: IndexMap = IndexMap::new(); - for _ in 0..num_scalars { - let (name, val) = parse_scalar(cursor)?; - fields.insert(name, val); - } - for _ in 0..num_vectors { - let (name, val) = parse_vector(cursor, size)?; - fields.insert(name, val); - } - - if cursor.position() - bytes_already_read != size as u64 { - return Err(DmapError::InvalidRecord(format!( - "Bytes read {} does not match the records size field {}", - cursor.position() - bytes_already_read, - size - ))); - } - - Self::new(&mut fields) + fn new(fields: &mut IndexMap) -> Result { + Ok(DmapRecord { + data: fields.to_owned(), + }) } - - /// Creates a new object from the parsed scalars and vectors - fn new(fields: &mut IndexMap) -> Result - where - Self: Sized; - - /// Checks the validity of an `IndexMap` as a representation of a DMAP record. - /// - /// Validity checks include ensuring that no unfamiliar entries exist, that all required - /// scalar and vector fields exist, that all scalar and vector fields are of the expected - /// type, and that vector fields which are expected to have the same dimensions do indeed - /// have the same dimensions. - fn check_fields( - field_dict: &mut IndexMap, - fields_for_type: &Fields, - ) -> Result<(), DmapError> { - let unsupported_keys: Vec<&String> = field_dict - .keys() - .filter(|&k| !fields_for_type.all_fields.contains(&&**k)) - .collect(); - if !unsupported_keys.is_empty() { - Err(DmapError::InvalidRecord(format!( - "Unsupported fields {:?}, fields supported are {:?}", - unsupported_keys, fields_for_type.all_fields - )))? - } - - for (field, expected_type) in fields_for_type.scalars_required.iter() { - match field_dict.get(&field.to_string()) { - Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} - Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!( - "Field {} has incorrect type {}, expected {}", - field, - x.get_type(), - expected_type - )))?, - Some(_) => Err(DmapError::InvalidRecord(format!( - "Field {} is a vector, expected scalar", - field - )))?, - None => Err(DmapError::InvalidRecord(format!( - "Field {field:?} ({:?}) missing: fields {:?}", - &field.to_string(), - field_dict.keys() - )))?, - } - } - for (field, expected_type) in fields_for_type.scalars_optional.iter() { - match field_dict.get(&field.to_string()) { - Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} - Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!( - "Field {} has incorrect type {}, expected {}", - field, - x.get_type(), - expected_type - )))?, - Some(_) => Err(DmapError::InvalidRecord(format!( - "Field {} is a vector, expected scalar", - field - )))?, - None => {} - } - } - for (field, expected_type) in fields_for_type.vectors_required.iter() { - match field_dict.get(&field.to_string()) { - Some(DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( - "Field {} is a scalar, expected vector", - field - )))?, - Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { - Err(DmapError::InvalidRecord(format!( - "Field {field} has incorrect type {:?}, expected {expected_type:?}", - x.get_type() - )))? - } - Some(&DmapField::Vector(_)) => {} - None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?, - } - } - for (field, expected_type) in fields_for_type.vectors_optional.iter() { - match field_dict.get(&field.to_string()) { - Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( - "Field {} is a scalar, expected vector", - field - )))?, - Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { - Err(DmapError::InvalidRecord(format!( - "Field {field} has incorrect type {}, expected {expected_type}", - x.get_type() - )))? - } - _ => {} - } - } - // This block checks that grouped vector fields have the same dimensionality - for vec_group in fields_for_type.vector_dim_groups.iter() { - let vecs: Vec<(&str, &DmapVec)> = vec_group - .iter() - .filter_map(|&name| match field_dict.get(&name.to_string()) { - Some(DmapField::Vector(ref x)) => Some((name, x)), - Some(_) => None, - None => None, - }) - .collect(); - if vecs.len() > 1 { - let mut vec_iter = vecs.iter(); - let first = vec_iter.next().expect("Iterator broken"); - if !vec_iter.all(|(_, v)| v.shape() == first.1.shape()) { - let error_vec: Vec<(&str, &[usize])> = - vecs.iter().map(|(k, v)| (*k, v.shape())).collect(); - Err(DmapError::InvalidRecord(format!( - "Vector fields have inconsistent dimensions: {:?}", - error_vec - )))? - } - } - } - Ok(()) - } - - /// Attempts to massage the entries of an `IndexMap` into the proper types for a DMAP record. - fn coerce>( - fields_dict: &mut IndexMap, - fields_for_type: &Fields, - ) -> Result { - let unsupported_keys: Vec<&String> = fields_dict - .keys() - .filter(|&k| !fields_for_type.all_fields.contains(&&**k)) - .collect(); - if !unsupported_keys.is_empty() { - Err(DmapError::InvalidRecord(format!( - "Unsupported fields {:?}, fields supported are {:?}", - unsupported_keys, fields_for_type.all_fields - )))? - } - - for (field, expected_type) in fields_for_type.scalars_required.iter() { - match fields_dict.get(&field.to_string()) { - Some(DmapField::Scalar(x)) if &x.get_type() != expected_type => { - fields_dict.insert( - field.to_string(), - DmapField::Scalar(x.cast_as(expected_type)?), - ); - } - Some(&DmapField::Scalar(_)) => {} - Some(_) => Err(DmapError::InvalidRecord(format!( - "Field {} is a vector, expected scalar", - field - )))?, - None => Err(DmapError::InvalidRecord(format!( - "Field {field:?} ({:?}) missing: fields {:?}", - &field.to_string(), - fields_dict.keys() - )))?, - } - } - for (field, expected_type) in fields_for_type.scalars_optional.iter() { - match fields_dict.get(&field.to_string()) { - Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} - Some(DmapField::Scalar(x)) => { - fields_dict.insert( - field.to_string(), - DmapField::Scalar(x.cast_as(expected_type)?), - ); - } - Some(_) => Err(DmapError::InvalidRecord(format!( - "Field {} is a vector, expected scalar", - field - )))?, - None => {} - } - } - for (field, expected_type) in fields_for_type.vectors_required.iter() { - match fields_dict.get(&field.to_string()) { - Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( - "Field {} is a scalar, expected vector", - field - )))?, - Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { - Err(DmapError::InvalidRecord(format!( - "Field {field} has incorrect type {:?}, expected {expected_type:?}", - x.get_type() - )))? - } - Some(&DmapField::Vector(_)) => {} - None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?, - } - } - for (field, expected_type) in fields_for_type.vectors_optional.iter() { - match fields_dict.get(&field.to_string()) { - Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( - "Field {} is a scalar, expected vector", - field - )))?, - Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { - Err(DmapError::InvalidRecord(format!( - "Field {field} has incorrect type {}, expected {expected_type}", - x.get_type() - )))? - } - _ => {} - } - } - - T::new(fields_dict) - } - - /// Attempts to copy `self` to a raw byte representation. - fn to_bytes(&self) -> Result, DmapError>; - - /// Converts the entries of an `IndexMap` into a raw byte representation, including metadata - /// about the entries (DMAP key, name\[, dimensions\]) - /// - /// If all is good, returns a tuple containing: - /// * the number of scalar fields - /// * the number of vector fields - /// * the raw bytes - fn data_to_bytes( - data: &IndexMap, - fields_for_type: &Fields, - ) -> Result<(i32, i32, Vec), DmapError> { + fn to_bytes(&self) -> Result, DmapError> { let mut data_bytes: Vec = vec![]; let mut num_scalars: i32 = 0; let mut num_vectors: i32 = 0; - for (field, _) in fields_for_type.scalars_required.iter() { - match data.get(&field.to_string()) { - Some(x @ DmapField::Scalar(_)) => { - data_bytes.extend(field.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_scalars += 1; - } - Some(_) => Err(DmapError::InvalidScalar(format!( - "Field {field} is a vector, expected scalar" - )))?, - None => Err(DmapError::InvalidRecord(format!( - "Field {field} missing from record" - )))?, - } - } - for (field, _) in fields_for_type.scalars_optional.iter() { - if let Some(x) = data.get(&field.to_string()) { - match x { - DmapField::Scalar(_) => { - data_bytes.extend(field.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_scalars += 1; - } - DmapField::Vector(_) => Err(DmapError::InvalidScalar(format!( - "Field {field} is a vector, expected scalar" - )))?, - } - } - } - for (field, _) in fields_for_type.vectors_required.iter() { - match data.get(&field.to_string()) { - Some(x @ DmapField::Vector(_)) => { - data_bytes.extend(field.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_vectors += 1; - } - Some(_) => Err(DmapError::InvalidVector(format!( - "Field {field} is a scalar, expected vector" - )))?, - None => Err(DmapError::InvalidRecord(format!( - "Field {field} missing from record" - )))?, - } - } - for (field, _) in fields_for_type.vectors_optional.iter() { - if let Some(x) = data.get(&field.to_string()) { - match x { - DmapField::Vector(_) => { - data_bytes.extend(field.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_vectors += 1; - } - DmapField::Scalar(_) => Err(DmapError::InvalidVector(format!( - "Field {field} is a scalar, expected vector" - )))?, - } - } - } - - Ok((num_scalars, num_vectors, data_bytes)) + // Do a first pass, to get all the scalar fields + for (name, val) in self.data.iter() { + if let x @ DmapField::Scalar(_) = val { + data_bytes.extend(name.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_scalars += 1; + } + } + // Do a second pass to convert all the vector fields + for (name, val) in self.data.iter() { + if let x @ DmapField::Vector(_) = val { + data_bytes.extend(name.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_vectors += 1; + } + } + let mut bytes: Vec = vec![]; + bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter + bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors + bytes.extend(num_scalars.as_bytes()); + bytes.extend(num_vectors.as_bytes()); + bytes.append(&mut data_bytes); // consumes data_bytes + Ok(bytes) } } +impl TryFrom<&mut IndexMap> for DmapRecord { + type Error = DmapError; -macro_rules! create_record_type { - ($format:ident, $fields:ident) => { - paste::paste! { - use crate::types::{DmapType, DmapField}; - use crate::error::DmapError; - use indexmap::IndexMap; - use crate::formats::dmap::Record; - - /// Struct containing the checked fields of a single RAWACF record. - #[derive(Debug, PartialEq, Clone)] - pub struct [< $format:camel Record >] { - pub data: IndexMap, - } - - impl [< $format:camel Record >] { - /// Returns the field with name `key`, if it exists in the record. - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - - /// Returns the names of all fields stored in the record. - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } - } - - impl Record<'_> for [< $format:camel Record>] { - fn inner(self) -> IndexMap { - self.data - } - fn new(fields: &mut IndexMap) -> Result<[< $format:camel Record>], DmapError> { - match Self::check_fields(fields, &$fields) { - Ok(_) => {} - Err(e) => Err(e)?, - } - - Ok([< $format:camel Record >] { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let (num_scalars, num_vectors, mut data_bytes) = - Self::data_to_bytes(&self.data, &$fields)?; - - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } - } - - impl TryFrom<&mut IndexMap> for [< $format:camel Record >] { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::<[< $format:camel Record>]>(value, &$fields) - } - } - } + fn try_from(value: &mut IndexMap) -> Result { + DmapRecord::new(value) } } - -pub(crate) use create_record_type; - diff --git a/src/formats/fitacf.rs b/src/formats/fitacf.rs index a44dfc8..3cd608a 100644 --- a/src/formats/fitacf.rs +++ b/src/formats/fitacf.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/generic.rs b/src/formats/generic.rs deleted file mode 100644 index 6da4a14..0000000 --- a/src/formats/generic.rs +++ /dev/null @@ -1,69 +0,0 @@ -use indexmap::IndexMap; -use crate::error::DmapError; -use crate::formats::dmap::Record; -use crate::types::{DmapField, DmapType}; - -#[derive(Debug, PartialEq, Clone)] -pub struct GenericRecord { - pub data: IndexMap, -} - -impl GenericRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - -impl Record<'_> for GenericRecord { - fn inner(self) -> IndexMap { - self.data - } - - fn new(fields: &mut IndexMap) -> Result { - Ok(GenericRecord { - data: fields.to_owned(), - }) - } - fn to_bytes(&self) -> Result, DmapError> { - let mut data_bytes: Vec = vec![]; - let mut num_scalars: i32 = 0; - let mut num_vectors: i32 = 0; - - // Do a first pass, to get all the scalar fields - for (name, val) in self.data.iter() { - if let x @ DmapField::Scalar(_) = val { - data_bytes.extend(name.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_scalars += 1; - } - } - // Do a second pass to convert all the vector fields - for (name, val) in self.data.iter() { - if let x @ DmapField::Vector(_) = val { - data_bytes.extend(name.as_bytes()); - data_bytes.extend([0]); // null-terminate string - data_bytes.append(&mut x.as_bytes()); - num_vectors += 1; - } - } - let mut bytes: Vec = vec![]; - bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter - bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors - bytes.extend(num_scalars.as_bytes()); - bytes.extend(num_vectors.as_bytes()); - bytes.append(&mut data_bytes); // consumes data_bytes - Ok(bytes) - } -} - -impl TryFrom<&mut IndexMap> for GenericRecord { - type Error = DmapError; - - fn try_from(value: &mut IndexMap) -> Result { - GenericRecord::new(value) - } -} \ No newline at end of file diff --git a/src/formats/grid.rs b/src/formats/grid.rs index 50e2859..da4bc1c 100644 --- a/src/formats/grid.rs +++ b/src/formats/grid.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/iqdat.rs b/src/formats/iqdat.rs index a5d180d..e579d2e 100644 --- a/src/formats/iqdat.rs +++ b/src/formats/iqdat.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/map.rs b/src/formats/map.rs index 38790e6..bf59d1c 100644 --- a/src/formats/map.rs +++ b/src/formats/map.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 8dca27c..43b3672 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,8 +1,5 @@ //! The supported DMAP file formats. -/// The shared functionality (Trait) common to all DMAP record types. -pub mod dmap; - /// The [FitACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/fitacf/) pub mod fitacf; @@ -23,4 +20,4 @@ pub mod snd; /// The generic [Dmap file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/) -pub mod generic; +pub mod dmap; diff --git a/src/formats/rawacf.rs b/src/formats/rawacf.rs index b0784f5..4d48af0 100644 --- a/src/formats/rawacf.rs +++ b/src/formats/rawacf.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/snd.rs b/src/formats/snd.rs index 7b12abb..7fb92c6 100644 --- a/src/formats/snd.rs +++ b/src/formats/snd.rs @@ -1,4 +1,4 @@ -use crate::formats::dmap::create_record_type; +use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/lib.rs b/src/lib.rs index 80cde2f..b068631 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,16 +8,17 @@ pub mod error; pub mod formats; pub mod types; +pub mod record; use crate::error::DmapError; -use crate::formats::dmap::Record; +use crate::formats::dmap::DmapRecord; use crate::formats::fitacf::FitacfRecord; use crate::formats::grid::GridRecord; use crate::formats::iqdat::IqdatRecord; use crate::formats::map::MapRecord; use crate::formats::rawacf::RawacfRecord; use crate::formats::snd::SndRecord; -use crate::formats::generic::GenericRecord; +use crate::record::Record; use crate::types::DmapField; use bzip2::read::BzEncoder; use bzip2::Compression; @@ -37,7 +38,7 @@ use std::path::PathBuf; /// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is /// `.bz2`, the bytes will be compressed using bzip2 before being written, and the file is instead /// opened in `create_new` mode, meaning it will fail if a file already exists at the given path. -fn write_to_file(bytes: Vec, outfile: &PathBuf) -> Result<(), std::io::Error> { +fn bytes_to_file(bytes: Vec, outfile: &PathBuf) -> Result<(), std::io::Error> { let mut out_bytes: Vec = vec![]; let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?; match outfile.extension() { @@ -71,45 +72,10 @@ pub fn write_records<'a>(mut recs: Vec>, outfile: &PathBuf) -> R )))? } bytes.par_extend(rec_bytes.into_par_iter().flatten()); - write_to_file(bytes, outfile)?; + bytes_to_file(bytes, outfile)?; Ok(()) } -/// Write generic DMAP to `outfile` -pub fn write_dmap(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write IQDAT records to `outfile`. -pub fn write_iqdat(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write RAWACF records to `outfile`. -pub fn write_rawacf(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write FITACF records to `outfile`. -pub fn write_fitacf(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write GRID records to `outfile`. -pub fn write_grid(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write MAP records to `outfile`. -pub fn write_map(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - -/// Write SND records to `outfile`. -pub fn write_snd(recs: Vec, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) -} - /// Attempts to convert `recs` to `T` then append to `outfile`. fn try_write_generic Record<'a>>( mut recs: Vec>, @@ -136,100 +102,58 @@ where ))? } bytes.par_extend(rec_bytes.into_par_iter().flatten()); - write_to_file(bytes, outfile)?; + bytes_to_file(bytes, outfile)?; Ok(()) } -/// Attempts to convert `recs` to `GenericRecord` then append to `outfile`. -pub fn try_write_dmap( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `IqdatRecord` then append to `outfile`. -pub fn try_write_iqdat( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `RawacfRecord` then append to `outfile`. -pub fn try_write_rawacf( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `FitacfRecord` then append to `outfile`. -pub fn try_write_fitacf( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `GridRecord` then append to `outfile`. -pub fn try_write_grid( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `MapRecord` then append to `outfile`. -pub fn try_write_map( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Attempts to convert `recs` to `SndRecord` then append to `outfile`. -pub fn try_write_snd( - recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - try_write_generic::(recs, outfile) -} - -/// Read in a DMAP file -pub fn read_dmap(infile: PathBuf) -> Result, DmapError> { - GenericRecord::read_file(&infile) -} - -/// Read in an IQDAT file -pub fn read_iqdat(infile: PathBuf) -> Result, DmapError> { - IqdatRecord::read_file(&infile) -} - -/// Read in a RAWACF file -pub fn read_rawacf(infile: PathBuf) -> Result, DmapError> { - RawacfRecord::read_file(&infile) -} +/// This macro generates two functions for writing to file. The first, `write_[type]`, takes in +/// records of type `[Type]Record`, while the second, `try_write_[type]`, takes in `Vec` +/// and attempts to coerce into `[Type]Record` then write to file. +macro_rules! write_rust { + ($type:ident) => { + paste! { + /// Write $type:upper records to `outfile`. + pub fn [< write_ $type >](recs: Vec<[< $type:camel Record >]>, outfile: &PathBuf) -> Result<(), DmapError> { + write_records(recs, outfile) + } -/// Read in a FITACF file -pub fn read_fitacf(infile: PathBuf) -> Result, DmapError> { - FitacfRecord::read_file(&infile) + /// Attempts to convert `recs` to `[< $type:camel Record >]` then append to `outfile`. + pub fn [< try_write_ $type >]( + recs: Vec>, + outfile: &PathBuf, + ) -> Result<(), DmapError> { + try_write_generic::<[< $type:camel Record >]>(recs, outfile) + } + } + } } -/// Read in a GRID file -pub fn read_grid(infile: PathBuf) -> Result, DmapError> { - GridRecord::read_file(&infile) -} +write_rust!(iqdat); +write_rust!(rawacf); +write_rust!(fitacf); +write_rust!(grid); +write_rust!(map); +write_rust!(snd); +write_rust!(dmap); -/// Read in a MAP file -pub fn read_map(infile: PathBuf) -> Result, DmapError> { - MapRecord::read_file(&infile) +macro_rules! read_type { + ($type:ident) => { + paste! { + /// Read in a $type:upper file + pub fn [< read_ $type >](infile: PathBuf) -> Result]>, DmapError> { + [< $type:camel Record >]::read_file(&infile) + } + } + } } -/// Read in an SND file -pub fn read_snd(infile: PathBuf) -> Result, DmapError> { - SndRecord::read_file(&infile) -} +read_type!(iqdat); +read_type!(rawacf); +read_type!(fitacf); +read_type!(grid); +read_type!(map); +read_type!(snd); +read_type!(dmap); /// Reads the data from infile into `Vec`. /// @@ -243,62 +167,6 @@ fn read_generic Record<'a> + Send>( .collect()) } -/// Reads a generic DMAP file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_dmap")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_dmap_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads an IQDAT file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_iqdat")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_iqdat_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads a RAWACF file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_rawacf")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_rawacf_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads a FITACF file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_fitacf")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_fitacf_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads a GRID file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_grid")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_grid_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads a MAP file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_map")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_map_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - -/// Reads an SND file, returning a list of dictionaries containing the fields. -#[pyfunction] -#[pyo3(name = "read_snd")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_snd_py(infile: PathBuf) -> PyResult>> { - read_generic::(infile).map_err(PyErr::from) -} - /// Reads the data from infile into a tuple of `([IndexMap], int|None)`, where /// all valid records are returned, plus optionally the byte of the first record /// with a corruption within the file. Compatible with RST behaviour. @@ -312,40 +180,42 @@ fn read_lax Record<'a> + Send>( )) } -macro_rules! read_lax_py { - ($name:ident, $py_name:literal) => { +/// Creates functions for reading DMAP files for the Python API. +/// +/// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax +/// reading, respectively. +macro_rules! read_py { + ($name:ident, $py_name:literal, $lax_name:literal) => { paste! { - /// Reads a [< $name:upper >] file, returning a tuple of + /// Reads a $name:upper file, returning a tuple of /// (list of dictionaries containing the fields, byte where first corrupted record starts). #[pyfunction] - #[pyo3(name = $py_name)] + #[pyo3(name = $lax_name)] #[pyo3(text_signature = "(infile: str, /)")] fn [< read_ $name _lax_py >]( infile: PathBuf, ) -> PyResult<(Vec>, Option)> { read_lax::<[< $name:camel Record >]>(infile).map_err(PyErr::from) } + + /// Reads a $name:upper file, returning a list of dictionaries containing the fields. + #[pyfunction] + #[pyo3(name = $py_name)] + #[pyo3(text_signature = "(infile: str, /)")] + fn [< read_ $name _py >](infile: PathBuf) -> PyResult>> { + read_generic::<[< $name:camel Record >]>(infile).map_err(PyErr::from) + } } } } -/// Reads a generic DMAP file, returning a tuple of -/// (list of dictionaries containing the fields, byte where first corrupted record starts). -#[pyfunction] -#[pyo3(name = "read_dmap_lax")] -#[pyo3(text_signature = "(infile: str, /)")] -fn read_dmap_lax_py( - infile: PathBuf, -) -> PyResult<(Vec>, Option)> { - read_lax::(infile).map_err(PyErr::from) -} - -read_lax_py!(iqdat, "read_iqdat_py"); -read_lax_py!(rawacf, "read_rawacf_py"); -read_lax_py!(fitacf, "read_fitacf_py"); -read_lax_py!(grid, "read_grid_py"); -read_lax_py!(map, "read_map_py"); -read_lax_py!(snd, "read_snd_py"); +read_py!(iqdat, "read_iqdat", "read_iqdat_lax"); +read_py!(rawacf, "read_rawacf", "read_rawacf_lax"); +read_py!(fitacf, "read_fitacf", "read_fitacf_lax"); +read_py!(grid, "read_grid", "read_grid_lax"); +read_py!(map, "read_map", "read_map_lax"); +read_py!(snd, "read_snd", "read_snd_lax"); +read_py!(dmap, "read_dmap", "read_dmap_lax"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// @@ -359,53 +229,27 @@ fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> Py try_write_dmap(recs, &outfile).map_err(PyErr::from) } -/// Checks that a list of dictionaries contains valid IQDAT records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_iqdat")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_iqdat_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_iqdat(recs, &outfile).map_err(PyErr::from) -} - -/// Checks that a list of dictionaries contains valid RAWACF records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_rawacf")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_rawacf_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_rawacf(recs, &outfile).map_err(PyErr::from) -} - -/// Checks that a list of dictionaries contains valid FITACF records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_fitacf")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_fitacf_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_fitacf(recs, &outfile).map_err(PyErr::from) -} - -/// Checks that a list of dictionaries contains valid GRID records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_grid")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_grid_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_grid(recs, &outfile).map_err(PyErr::from) -} - -/// Checks that a list of dictionaries contains valid MAP records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_map")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_map_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_map(recs, &outfile).map_err(PyErr::from) +/// Generates functions exposed to the Python API for writing specific file types. +macro_rules! write_py { + ($name:ident, $fn_name:literal) => { + paste! { + /// Checks that a list of dictionaries contains valid $name:upper records, then appends to outfile. + #[pyfunction] + #[pyo3(name = $fn_name)] + #[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] + fn [< write_ $name _py >](recs: Vec>, outfile: PathBuf) -> PyResult<()> { + [< try_write_ $name >](recs, &outfile).map_err(PyErr::from) + } + } + } } -/// Checks that a list of dictionaries contains valid SND records, then appends to outfile. -#[pyfunction] -#[pyo3(name = "write_snd")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_snd_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_snd(recs, &outfile).map_err(PyErr::from) -} +write_py!(iqdat, "write_iqdat"); +write_py!(rawacf, "write_rawacf"); +write_py!(fitacf, "write_fitacf"); +write_py!(grid, "write_grid"); +write_py!(map, "write_map"); +write_py!(snd, "write_snd"); /// Functions for SuperDARN DMAP file format I/O. #[pymodule] diff --git a/src/record.rs b/src/record.rs new file mode 100644 index 0000000..fba25d5 --- /dev/null +++ b/src/record.rs @@ -0,0 +1,604 @@ +//! Defines the `Record` trait, which contains the shared behaviour that all +//! DMAP records must have. + +use crate::error::DmapError; +use crate::types::{parse_scalar, parse_vector, read_data, DmapField, DmapType, DmapVec, Fields}; +use bzip2::read::BzDecoder; +use indexmap::IndexMap; +use rayon::prelude::*; +use std::ffi::OsStr; +use std::fmt::Debug; +use std::fs::File; +use std::io::{Cursor, Read}; +use std::path::PathBuf; + +pub trait Record<'a>: + Debug + Send + TryFrom<&'a mut IndexMap, Error = DmapError> +{ + /// Gets the underlying data of the Record. + fn inner(self) -> IndexMap; + + /// Reads from dmap_data and parses into a collection of Records. + /// + /// Returns `DmapError` if dmap_data cannot be read or contains invalid data. + fn read_records(mut dmap_data: impl Read) -> Result, DmapError> + where + Self: Sized, + Self: Send, + { + let mut buffer: Vec = vec![]; + dmap_data.read_to_end(&mut buffer)?; + + let mut slices: Vec<_> = vec![]; + let mut rec_start: usize = 0; + let mut rec_size: usize; + let mut rec_end: usize; + while ((rec_start + 2 * i32::size()) as u64) < buffer.len() as u64 { + rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) + as usize; // advance 4 bytes, skipping the "code" field + rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() + if rec_end > buffer.len() { + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))); + } else if rec_size <= 0 { + return Err(DmapError::InvalidRecord(format!( + "Record {} starting at byte {} has non-positive size {} <= 0", + slices.len(), + rec_start, + rec_size + ))); + } + slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); + rec_start = rec_end; + } + if rec_start != buffer.len() { + return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} incomplete; has size of {} bytes", slices.len() + 1, rec_start, buffer.len() - rec_start))) + } + let mut dmap_results: Vec> = vec![]; + dmap_results.par_extend( + slices + .par_iter_mut() + .map(|cursor| Self::parse_record(cursor)), + ); + + let mut dmap_records: Vec = vec![]; + let mut bad_recs: Vec = vec![]; + let mut dmap_errors: Vec = vec![]; + for (i, rec) in dmap_results.into_iter().enumerate() { + match rec { + Ok(x) => dmap_records.push(x), + Err(e) => { + dmap_errors.push(e); + bad_recs.push(i); + } + } + } + if dmap_errors.len() > 0 { + return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string())); + } + Ok(dmap_records) + } + + /// Reads from dmap_data and parses into a collection of Records. + /// + /// Returns a tuple of `(good records, Option)`. + fn read_records_lax( + mut dmap_data: impl Read, + ) -> Result<(Vec, Option), DmapError> + where + Self: Sized, + Self: Send, + { + let mut buffer: Vec = vec![]; + dmap_data.read_to_end(&mut buffer)?; + + let mut dmap_records: Vec = vec![]; + let mut bad_byte: Option = None; + + let mut slices: Vec<_> = vec![]; + let mut rec_start: usize = 0; + let mut rec_size: usize; + let mut rec_end: usize; + + let mut rec_starts = vec![]; + while ((rec_start + 2 * i32::size()) as u64) < buffer.len() as u64 { + rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) + as usize; // advance 4 bytes, skipping the "code" field + rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() + if rec_end > buffer.len() || rec_size <= 0 { + bad_byte = Some(rec_start); + break + // rec_start = buffer.len(); // break from loop + } else { + rec_starts.push(rec_start); + slices.push(Cursor::new(buffer[rec_start..rec_end].to_vec())); + rec_start = rec_end; + } + } + let mut dmap_results: Vec> = vec![]; + dmap_results.par_extend( + slices + .par_iter_mut() + .map(|cursor| Self::parse_record(cursor)), + ); + + for (i, rec) in dmap_results.into_iter().enumerate() { + if let Ok(x) = rec { + dmap_records.push(x); + } else { + bad_byte = Some(rec_starts[i]); + break; + } + } + Ok((dmap_records, bad_byte)) + } + + /// Read a DMAP file of type `Self` + fn read_file(infile: &PathBuf) -> Result, DmapError> + where + Self: Sized, + Self: Send, + { + let file = File::open(infile)?; + match infile.extension() { + Some(ext) if ext == OsStr::new("bz2") => { + let compressor = BzDecoder::new(file); + Self::read_records(compressor) + } + _ => Self::read_records(file), + } + } + + /// Read a DMAP file of type `Self`, + fn read_file_lax(infile: &PathBuf) -> Result<(Vec, Option), DmapError> + where + Self: Sized, + Self: Send, + { + let file = File::open(infile)?; + match infile.extension() { + Some(ext) if ext == OsStr::new("bz2") => { + let compressor = BzDecoder::new(file); + Self::read_records_lax(compressor) + } + _ => Self::read_records_lax(file), + } + } + + /// Reads a record starting from cursor position + fn parse_record(cursor: &mut Cursor>) -> Result + where + Self: Sized, + { + let bytes_already_read = cursor.position(); + let _code = read_data::(cursor).map_err(|e| { + DmapError::InvalidRecord(format!( + "Cannot interpret code at byte {}: {e}", + bytes_already_read + )) + })?; + let size = read_data::(cursor).map_err(|e| { + DmapError::InvalidRecord(format!( + "Cannot interpret size at byte {}: {e}", + bytes_already_read + i32::size() as u64 + )) + })?; + + // adding 8 bytes because code and size are part of the record. + if size as u64 > cursor.get_ref().len() as u64 - cursor.position() + 2 * i32::size() as u64 + { + return Err(DmapError::InvalidRecord(format!( + "Record size {size} at byte {} bigger than remaining buffer {}", + cursor.position() - i32::size() as u64, + cursor.get_ref().len() as u64 - cursor.position() + 2 * i32::size() as u64 + ))); + } else if size <= 0 { + return Err(DmapError::InvalidRecord(format!("Record size {size} <= 0"))); + } + + let num_scalars = read_data::(cursor).map_err(|e| { + DmapError::InvalidRecord(format!( + "Cannot interpret number of scalars at byte {}: {e}", + cursor.position() - i32::size() as u64 + )) + })?; + let num_vectors = read_data::(cursor).map_err(|e| { + DmapError::InvalidRecord(format!( + "Cannot interpret number of vectors at byte {}: {e}", + cursor.position() - i32::size() as u64 + )) + })?; + if num_scalars <= 0 { + return Err(DmapError::InvalidRecord(format!( + "Number of scalars {num_scalars} at byte {} <= 0", + cursor.position() - 2 * i32::size() as u64 + ))); + } else if num_vectors <= 0 { + return Err(DmapError::InvalidRecord(format!( + "Number of vectors {num_vectors} at byte {} <= 0", + cursor.position() - i32::size() as u64 + ))); + } else if num_scalars + num_vectors > size { + return Err(DmapError::InvalidRecord(format!( + "Number of scalars {num_scalars} plus vectors {num_vectors} greater than size '{size}'"))); + } + + let mut fields: IndexMap = IndexMap::new(); + for _ in 0..num_scalars { + let (name, val) = parse_scalar(cursor)?; + fields.insert(name, val); + } + for _ in 0..num_vectors { + let (name, val) = parse_vector(cursor, size)?; + fields.insert(name, val); + } + + if cursor.position() - bytes_already_read != size as u64 { + return Err(DmapError::InvalidRecord(format!( + "Bytes read {} does not match the records size field {}", + cursor.position() - bytes_already_read, + size + ))); + } + + Self::new(&mut fields) + } + + /// Creates a new object from the parsed scalars and vectors + fn new(fields: &mut IndexMap) -> Result + where + Self: Sized; + + /// Checks the validity of an `IndexMap` as a representation of a DMAP record. + /// + /// Validity checks include ensuring that no unfamiliar entries exist, that all required + /// scalar and vector fields exist, that all scalar and vector fields are of the expected + /// type, and that vector fields which are expected to have the same dimensions do indeed + /// have the same dimensions. + fn check_fields( + field_dict: &mut IndexMap, + fields_for_type: &Fields, + ) -> Result<(), DmapError> { + let unsupported_keys: Vec<&String> = field_dict + .keys() + .filter(|&k| !fields_for_type.all_fields.contains(&&**k)) + .collect(); + if !unsupported_keys.is_empty() { + Err(DmapError::InvalidRecord(format!( + "Unsupported fields {:?}, fields supported are {:?}", + unsupported_keys, fields_for_type.all_fields + )))? + } + + for (field, expected_type) in fields_for_type.scalars_required.iter() { + match field_dict.get(&field.to_string()) { + Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} + Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!( + "Field {} has incorrect type {}, expected {}", + field, + x.get_type(), + expected_type + )))?, + Some(_) => Err(DmapError::InvalidRecord(format!( + "Field {} is a vector, expected scalar", + field + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field:?} ({:?}) missing: fields {:?}", + &field.to_string(), + field_dict.keys() + )))?, + } + } + for (field, expected_type) in fields_for_type.scalars_optional.iter() { + match field_dict.get(&field.to_string()) { + Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} + Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!( + "Field {} has incorrect type {}, expected {}", + field, + x.get_type(), + expected_type + )))?, + Some(_) => Err(DmapError::InvalidRecord(format!( + "Field {} is a vector, expected scalar", + field + )))?, + None => {} + } + } + for (field, expected_type) in fields_for_type.vectors_required.iter() { + match field_dict.get(&field.to_string()) { + Some(DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( + "Field {} is a scalar, expected vector", + field + )))?, + Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { + Err(DmapError::InvalidRecord(format!( + "Field {field} has incorrect type {:?}, expected {expected_type:?}", + x.get_type() + )))? + } + Some(&DmapField::Vector(_)) => {} + None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?, + } + } + for (field, expected_type) in fields_for_type.vectors_optional.iter() { + match field_dict.get(&field.to_string()) { + Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( + "Field {} is a scalar, expected vector", + field + )))?, + Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { + Err(DmapError::InvalidRecord(format!( + "Field {field} has incorrect type {}, expected {expected_type}", + x.get_type() + )))? + } + _ => {} + } + } + // This block checks that grouped vector fields have the same dimensionality + for vec_group in fields_for_type.vector_dim_groups.iter() { + let vecs: Vec<(&str, &DmapVec)> = vec_group + .iter() + .filter_map(|&name| match field_dict.get(&name.to_string()) { + Some(DmapField::Vector(ref x)) => Some((name, x)), + Some(_) => None, + None => None, + }) + .collect(); + if vecs.len() > 1 { + let mut vec_iter = vecs.iter(); + let first = vec_iter.next().expect("Iterator broken"); + if !vec_iter.all(|(_, v)| v.shape() == first.1.shape()) { + let error_vec: Vec<(&str, &[usize])> = + vecs.iter().map(|(k, v)| (*k, v.shape())).collect(); + Err(DmapError::InvalidRecord(format!( + "Vector fields have inconsistent dimensions: {:?}", + error_vec + )))? + } + } + } + Ok(()) + } + + /// Attempts to massage the entries of an `IndexMap` into the proper types for a DMAP record. + fn coerce>( + fields_dict: &mut IndexMap, + fields_for_type: &Fields, + ) -> Result { + let unsupported_keys: Vec<&String> = fields_dict + .keys() + .filter(|&k| !fields_for_type.all_fields.contains(&&**k)) + .collect(); + if !unsupported_keys.is_empty() { + Err(DmapError::InvalidRecord(format!( + "Unsupported fields {:?}, fields supported are {:?}", + unsupported_keys, fields_for_type.all_fields + )))? + } + + for (field, expected_type) in fields_for_type.scalars_required.iter() { + match fields_dict.get(&field.to_string()) { + Some(DmapField::Scalar(x)) if &x.get_type() != expected_type => { + fields_dict.insert( + field.to_string(), + DmapField::Scalar(x.cast_as(expected_type)?), + ); + } + Some(&DmapField::Scalar(_)) => {} + Some(_) => Err(DmapError::InvalidRecord(format!( + "Field {} is a vector, expected scalar", + field + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field:?} ({:?}) missing: fields {:?}", + &field.to_string(), + fields_dict.keys() + )))?, + } + } + for (field, expected_type) in fields_for_type.scalars_optional.iter() { + match fields_dict.get(&field.to_string()) { + Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {} + Some(DmapField::Scalar(x)) => { + fields_dict.insert( + field.to_string(), + DmapField::Scalar(x.cast_as(expected_type)?), + ); + } + Some(_) => Err(DmapError::InvalidRecord(format!( + "Field {} is a vector, expected scalar", + field + )))?, + None => {} + } + } + for (field, expected_type) in fields_for_type.vectors_required.iter() { + match fields_dict.get(&field.to_string()) { + Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( + "Field {} is a scalar, expected vector", + field + )))?, + Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { + Err(DmapError::InvalidRecord(format!( + "Field {field} has incorrect type {:?}, expected {expected_type:?}", + x.get_type() + )))? + } + Some(&DmapField::Vector(_)) => {} + None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?, + } + } + for (field, expected_type) in fields_for_type.vectors_optional.iter() { + match fields_dict.get(&field.to_string()) { + Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( + "Field {} is a scalar, expected vector", + field + )))?, + Some(DmapField::Vector(x)) if &x.get_type() != expected_type => { + Err(DmapError::InvalidRecord(format!( + "Field {field} has incorrect type {}, expected {expected_type}", + x.get_type() + )))? + } + _ => {} + } + } + + T::new(fields_dict) + } + + /// Attempts to copy `self` to a raw byte representation. + fn to_bytes(&self) -> Result, DmapError>; + + /// Converts the entries of an `IndexMap` into a raw byte representation, including metadata + /// about the entries (DMAP key, name\[, dimensions\]) + /// + /// If all is good, returns a tuple containing: + /// * the number of scalar fields + /// * the number of vector fields + /// * the raw bytes + fn data_to_bytes( + data: &IndexMap, + fields_for_type: &Fields, + ) -> Result<(i32, i32, Vec), DmapError> { + let mut data_bytes: Vec = vec![]; + let mut num_scalars: i32 = 0; + let mut num_vectors: i32 = 0; + + for (field, _) in fields_for_type.scalars_required.iter() { + match data.get(&field.to_string()) { + Some(x @ DmapField::Scalar(_)) => { + data_bytes.extend(field.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_scalars += 1; + } + Some(_) => Err(DmapError::InvalidScalar(format!( + "Field {field} is a vector, expected scalar" + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field} missing from record" + )))?, + } + } + for (field, _) in fields_for_type.scalars_optional.iter() { + if let Some(x) = data.get(&field.to_string()) { + match x { + DmapField::Scalar(_) => { + data_bytes.extend(field.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_scalars += 1; + } + DmapField::Vector(_) => Err(DmapError::InvalidScalar(format!( + "Field {field} is a vector, expected scalar" + )))?, + } + } + } + for (field, _) in fields_for_type.vectors_required.iter() { + match data.get(&field.to_string()) { + Some(x @ DmapField::Vector(_)) => { + data_bytes.extend(field.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_vectors += 1; + } + Some(_) => Err(DmapError::InvalidVector(format!( + "Field {field} is a scalar, expected vector" + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field} missing from record" + )))?, + } + } + for (field, _) in fields_for_type.vectors_optional.iter() { + if let Some(x) = data.get(&field.to_string()) { + match x { + DmapField::Vector(_) => { + data_bytes.extend(field.as_bytes()); + data_bytes.extend([0]); // null-terminate string + data_bytes.append(&mut x.as_bytes()); + num_vectors += 1; + } + DmapField::Scalar(_) => Err(DmapError::InvalidVector(format!( + "Field {field} is a scalar, expected vector" + )))?, + } + } + } + + Ok((num_scalars, num_vectors, data_bytes)) + } +} + + +macro_rules! create_record_type { + ($format:ident, $fields:ident) => { + paste::paste! { + use crate::types::{DmapType, DmapField}; + use crate::error::DmapError; + use indexmap::IndexMap; + use crate::record::Record; + + /// Struct containing the checked fields of a single RAWACF record. + #[derive(Debug, PartialEq, Clone)] + pub struct [< $format:camel Record >] { + pub data: IndexMap, + } + + impl [< $format:camel Record >] { + /// Returns the field with name `key`, if it exists in the record. + pub fn get(&self, key: &String) -> Option<&DmapField> { + self.data.get(key) + } + + /// Returns the names of all fields stored in the record. + pub fn keys(&self) -> Vec<&String> { + self.data.keys().collect() + } + } + + impl Record<'_> for [< $format:camel Record>] { + fn inner(self) -> IndexMap { + self.data + } + fn new(fields: &mut IndexMap) -> Result<[< $format:camel Record>], DmapError> { + match Self::check_fields(fields, &$fields) { + Ok(_) => {} + Err(e) => Err(e)?, + } + + Ok([< $format:camel Record >] { + data: fields.to_owned(), + }) + } + fn to_bytes(&self) -> Result, DmapError> { + let (num_scalars, num_vectors, mut data_bytes) = + Self::data_to_bytes(&self.data, &$fields)?; + + let mut bytes: Vec = vec![]; + bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter + bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors + bytes.extend(num_scalars.as_bytes()); + bytes.extend(num_vectors.as_bytes()); + bytes.append(&mut data_bytes); // consumes data_bytes + Ok(bytes) + } + } + + impl TryFrom<&mut IndexMap> for [< $format:camel Record >] { + type Error = DmapError; + + fn try_from(value: &mut IndexMap) -> Result { + Self::coerce::<[< $format:camel Record>]>(value, &$fields) + } + } + } + } +} + +pub(crate) use create_record_type; + diff --git a/tests/tests.rs b/tests/tests.rs index 68d71df..fe8d95f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,11 +1,11 @@ -use dmap::formats::dmap::Record; +use dmap::record::Record; use dmap::formats::fitacf::FitacfRecord; use dmap::formats::grid::GridRecord; use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; -use dmap::formats::generic::GenericRecord; +use dmap::formats::dmap::DmapRecord; use dmap::{write_dmap, write_iqdat, write_rawacf, write_fitacf, write_grid, write_map, write_snd}; use itertools::izip; use paste::paste; @@ -82,9 +82,9 @@ macro_rules! make_test { let mut tempfile: PathBuf = filename.clone(); tempfile.set_file_name(format!("tmp.{}.generic", stringify!($record_type))); - let gen_data = GenericRecord::read_file(&filename).expect("Unable to read file"); + let gen_data = DmapRecord::read_file(&filename).expect("Unable to read file"); _ = write_dmap(gen_data.clone(), &tempfile).expect("Unable to write to file"); - let new_recs = GenericRecord::read_file(&tempfile).expect("Cannot read tempfile"); + let new_recs = DmapRecord::read_file(&tempfile).expect("Cannot read tempfile"); for (new_rec, ref_rec) in izip!(new_recs.iter(), gen_data.iter()) { assert_eq!(new_rec, ref_rec) } From 2c37669cd71592f6ca25cc04a9001d10e4f200ae Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 25 Jun 2025 16:46:00 +0000 Subject: [PATCH 07/18] Added a few unit tests to types.rs --- src/types.rs | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/types.rs b/src/types.rs index 3fd908c..a82b05b 100644 --- a/src/types.rs +++ b/src/types.rs @@ -854,3 +854,78 @@ pub(crate) fn read_data(cursor: &mut Cursor>) -> Result Ok(parsed_data) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_vec() { + let bytes: Vec = vec![1, 0, 1, 0]; + let mut cursor = Cursor::new(bytes.clone()); + let data = read_vector::(&mut cursor, 4); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), vec![1, 0, 1, 0]); + + cursor.set_position(0); + let data = read_vector::(&mut cursor, 2); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), vec![1, 1]); + + cursor.set_position(0); + let data = read_vector::(&mut cursor, 4); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), vec![1, 0, 1, 0]); + + cursor.set_position(0); + let data = read_vector::(&mut cursor, 2); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), vec![1, 1]); + } + + #[test] + fn test_read_data() { + // bytes are little-endian, so this will come out to 1 no matter if you interpret the first + // number of bytes as u8, u16, u32, u64, i8, i16, i32, or i64. + let bytes: Vec = vec![1, 0, 0, 0, 0, 0, 0, 0]; + let mut cursor = Cursor::new(bytes); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + + cursor.set_position(0); + let data = read_data::(&mut cursor); + assert!(data.is_ok()); + assert_eq!(data.unwrap(), 1); + } +} From 26e76427b4db05bdc2e616d9f81b88c4807eb6f8 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 25 Jun 2025 19:03:50 +0000 Subject: [PATCH 08/18] Ran `cargo fmt` --- benches/io_benchmarking.rs | 2 +- src/formats/dmap.rs | 6 +- src/formats/fitacf.rs | 1 - src/formats/grid.rs | 1 - src/formats/iqdat.rs | 1 - src/formats/map.rs | 1 - src/formats/mod.rs | 1 - src/formats/rawacf.rs | 1 - src/formats/snd.rs | 1 - src/lib.rs | 17 +++-- src/record.rs | 15 +++-- src/types.rs | 129 +++++++++++++++++++++++++------------ tests/tests.rs | 11 ++-- 13 files changed, 116 insertions(+), 71 deletions(-) diff --git a/benches/io_benchmarking.rs b/benches/io_benchmarking.rs index 3773840..9216e4d 100644 --- a/benches/io_benchmarking.rs +++ b/benches/io_benchmarking.rs @@ -1,11 +1,11 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use dmap::record::Record; use dmap::formats::fitacf::FitacfRecord; use dmap::formats::grid::GridRecord; use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; +use dmap::record::Record; use paste::paste; use std::fs::File; diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index 78dbfd5..535f3ce 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -1,11 +1,11 @@ -//! Defines the `DmapRecord` struct which implements `Record`, which can be used -//! for reading/writing DMAP files without checking that certain fields are or +//! Defines the `DmapRecord` struct which implements `Record`, which can be used +//! for reading/writing DMAP files without checking that certain fields are or //! are not present, or have a given type. -use indexmap::IndexMap; use crate::error::DmapError; use crate::record::Record; use crate::types::{DmapField, DmapType}; +use indexmap::IndexMap; #[derive(Debug, PartialEq, Clone)] pub struct DmapRecord { diff --git a/src/formats/fitacf.rs b/src/formats/fitacf.rs index 3cd608a..628dca6 100644 --- a/src/formats/fitacf.rs +++ b/src/formats/fitacf.rs @@ -177,4 +177,3 @@ lazy_static! { } create_record_type!(fitacf, FITACF_FIELDS); - diff --git a/src/formats/grid.rs b/src/formats/grid.rs index da4bc1c..f7b3757 100644 --- a/src/formats/grid.rs +++ b/src/formats/grid.rs @@ -111,4 +111,3 @@ lazy_static! { } create_record_type!(grid, GRID_FIELDS); - diff --git a/src/formats/iqdat.rs b/src/formats/iqdat.rs index e579d2e..780aeef 100644 --- a/src/formats/iqdat.rs +++ b/src/formats/iqdat.rs @@ -93,4 +93,3 @@ lazy_static! { } create_record_type!(iqdat, IQDAT_FIELDS); - diff --git a/src/formats/map.rs b/src/formats/map.rs index bf59d1c..a2bd725 100644 --- a/src/formats/map.rs +++ b/src/formats/map.rs @@ -166,4 +166,3 @@ lazy_static! { } create_record_type!(map, MAP_FIELDS); - diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 43b3672..8bc4a01 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -19,5 +19,4 @@ pub mod rawacf; pub mod snd; /// The generic [Dmap file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/) - pub mod dmap; diff --git a/src/formats/rawacf.rs b/src/formats/rawacf.rs index 4d48af0..40827a4 100644 --- a/src/formats/rawacf.rs +++ b/src/formats/rawacf.rs @@ -83,4 +83,3 @@ lazy_static! { } create_record_type!(rawacf, RAWACF_FIELDS); - diff --git a/src/formats/snd.rs b/src/formats/snd.rs index 7fb92c6..2c59445 100644 --- a/src/formats/snd.rs +++ b/src/formats/snd.rs @@ -82,4 +82,3 @@ lazy_static! { } create_record_type!(snd, SND_FIELDS); - diff --git a/src/lib.rs b/src/lib.rs index b068631..48df26e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,8 @@ pub mod error; pub mod formats; -pub mod types; pub mod record; +pub mod types; use crate::error::DmapError; use crate::formats::dmap::DmapRecord; @@ -57,7 +57,10 @@ fn bytes_to_file(bytes: Vec, outfile: &PathBuf) -> Result<(), std::io::Error /// /// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their /// specific field checks. -pub fn write_records<'a>(mut recs: Vec>, outfile: &PathBuf) -> Result<(), DmapError> { +pub fn write_records<'a>( + mut recs: Vec>, + outfile: &PathBuf, +) -> Result<(), DmapError> { let mut bytes: Vec = vec![]; let (errors, rec_bytes): (Vec<_>, Vec<_>) = recs.par_iter_mut() @@ -111,7 +114,7 @@ where /// and attempts to coerce into `[Type]Record` then write to file. macro_rules! write_rust { ($type:ident) => { - paste! { + paste! { /// Write $type:upper records to `outfile`. pub fn [< write_ $type >](recs: Vec<[< $type:camel Record >]>, outfile: &PathBuf) -> Result<(), DmapError> { write_records(recs, outfile) @@ -180,12 +183,12 @@ fn read_lax Record<'a> + Send>( )) } -/// Creates functions for reading DMAP files for the Python API. -/// -/// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax +/// Creates functions for reading DMAP files for the Python API. +/// +/// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax /// reading, respectively. macro_rules! read_py { - ($name:ident, $py_name:literal, $lax_name:literal) => { + ($name:ident, $py_name:literal, $lax_name:literal) => { paste! { /// Reads a $name:upper file, returning a tuple of /// (list of dictionaries containing the fields, byte where first corrupted record starts). diff --git a/src/record.rs b/src/record.rs index fba25d5..2722889 100644 --- a/src/record.rs +++ b/src/record.rs @@ -51,7 +51,12 @@ pub trait Record<'a>: rec_start = rec_end; } if rec_start != buffer.len() { - return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} incomplete; has size of {} bytes", slices.len() + 1, rec_start, buffer.len() - rec_start))) + return Err(DmapError::InvalidRecord(format!( + "Record {} starting at byte {} incomplete; has size of {} bytes", + slices.len() + 1, + rec_start, + buffer.len() - rec_start + ))); } let mut dmap_results: Vec> = vec![]; dmap_results.par_extend( @@ -81,9 +86,7 @@ pub trait Record<'a>: /// Reads from dmap_data and parses into a collection of Records. /// /// Returns a tuple of `(good records, Option)`. - fn read_records_lax( - mut dmap_data: impl Read, - ) -> Result<(Vec, Option), DmapError> + fn read_records_lax(mut dmap_data: impl Read) -> Result<(Vec, Option), DmapError> where Self: Sized, Self: Send, @@ -106,7 +109,7 @@ pub trait Record<'a>: rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() if rec_end > buffer.len() || rec_size <= 0 { bad_byte = Some(rec_start); - break + break; // rec_start = buffer.len(); // break from loop } else { rec_starts.push(rec_start); @@ -534,7 +537,6 @@ pub trait Record<'a>: } } - macro_rules! create_record_type { ($format:ident, $fields:ident) => { paste::paste! { @@ -601,4 +603,3 @@ macro_rules! create_record_type { } pub(crate) use create_record_type; - diff --git a/src/types.rs b/src/types.rs index a82b05b..b7a8855 100644 --- a/src/types.rs +++ b/src/types.rs @@ -258,25 +258,23 @@ impl DmapVec { /// Copies the data and metadata (dimensions, `Type` key) to raw bytes pub(crate) fn as_bytes(&self) -> Vec { let mut bytes: Vec = DmapType::as_bytes(&self.get_type().key()).to_vec(); - + macro_rules! vec_to_bytes { - ($bytes:ident, $x:ident) => { - { - $bytes.extend(($x.ndim() as i32).to_le_bytes()); - for &dim in $x.shape().iter().rev() { - $bytes.extend((dim as i32).to_le_bytes()); - } - for y in $x.iter() { - $bytes.append(&mut DmapType::as_bytes(y).to_vec()); - } + ($bytes:ident, $x:ident) => {{ + $bytes.extend(($x.ndim() as i32).to_le_bytes()); + for &dim in $x.shape().iter().rev() { + $bytes.extend((dim as i32).to_le_bytes()); } - } + for y in $x.iter() { + $bytes.append(&mut DmapType::as_bytes(y).to_vec()); + } + }}; } match self { DmapVec::Char(x) => vec_to_bytes!(bytes, x), DmapVec::Short(x) => vec_to_bytes!(bytes, x), - DmapVec::Int(x) => vec_to_bytes!(bytes, x), + DmapVec::Int(x) => vec_to_bytes!(bytes, x), DmapVec::Long(x) => vec_to_bytes!(bytes, x), DmapVec::Uchar(x) => vec_to_bytes!(bytes, x), DmapVec::Ushort(x) => vec_to_bytes!(bytes, x), @@ -366,9 +364,10 @@ macro_rules! vec_impls { if let $enum_var(x) = value { Ok(x) } else { - Err(DmapError::InvalidVector( - format!("Cannot convert to {}", stringify!($type)) - )) + Err(DmapError::InvalidVector(format!( + "Cannot convert to {}", + stringify!($type) + ))) } } } @@ -385,13 +384,14 @@ macro_rules! vec_impls { fn try_from(value: DmapField) -> std::result::Result { match value { DmapField::Vector(x) => x.try_into(), - _ => Err(Self::Error::InvalidVector( - format!("Cannot interpret as {}", stringify!($type)) - )), + _ => Err(Self::Error::InvalidVector(format!( + "Cannot interpret as {}", + stringify!($type) + ))), } } } - } + }; } vec_impls!(ArrayD, DmapVec::Char); @@ -453,9 +453,10 @@ macro_rules! scalar_impls { fn try_from(value: DmapField) -> std::result::Result { match value { DmapField::Scalar(x) => x.try_into(), - _ => Err(Self::Error::InvalidScalar( - format!("Cannot interpret as {}", stringify!($type)), - )), + _ => Err(Self::Error::InvalidScalar(format!( + "Cannot interpret as {}", + stringify!($type) + ))), } } } @@ -466,12 +467,13 @@ macro_rules! scalar_impls { Ok(x) } else { Err(DmapError::InvalidScalar(format!( - "Unable to convert {value} to {}", stringify!($type) + "Unable to convert {value} to {}", + stringify!($type) ))) } } } - } + }; } scalar_impls!(i8, DmapScalar::Char); @@ -513,7 +515,7 @@ macro_rules! type_impls { AsBytes::as_bytes(self).to_vec() } fn from_bytes(bytes: &[u8]) -> Result - where + where Self: Sized, { Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) @@ -532,7 +534,7 @@ macro_rules! type_impls { bytes.to_vec() } fn from_bytes(bytes: &[u8]) -> Result - where + where Self: Sized, { Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) @@ -777,28 +779,75 @@ pub(crate) fn parse_vector( record_size ))); } - + macro_rules! dmapvec_from_cursor { ($type:ty, $enum_var:path, $dims:ident, $cursor:ident, $num_elements:ident, $name:ident) => { $enum_var( ArrayD::from_shape_vec($dims, read_vector::<$type>($cursor, $num_elements)?) .map_err(|e| { - DmapError::InvalidVector(format!("Could not read in vector field {name}: {e}")) - })? + DmapError::InvalidVector(format!( + "Could not read in vector field {name}: {e}" + )) + })?, ) - } + }; } let vector: DmapVec = match data_type { - Type::Char => dmapvec_from_cursor!(i8, DmapVec::Char, dimensions, cursor, total_elements, name), - Type::Short => dmapvec_from_cursor!(i16, DmapVec::Short, dimensions, cursor, total_elements, name), - Type::Int => dmapvec_from_cursor!(i32, DmapVec::Int, dimensions, cursor, total_elements, name), - Type::Long => dmapvec_from_cursor!(i64, DmapVec::Long, dimensions, cursor, total_elements, name), - Type::Uchar => dmapvec_from_cursor!(u8, DmapVec::Uchar, dimensions, cursor, total_elements, name), - Type::Ushort => dmapvec_from_cursor!(u16, DmapVec::Ushort, dimensions, cursor, total_elements, name), - Type::Uint => dmapvec_from_cursor!(u32, DmapVec::Uint, dimensions, cursor, total_elements, name), - Type::Ulong => dmapvec_from_cursor!(u64, DmapVec::Ulong, dimensions, cursor, total_elements, name), - Type::Float => dmapvec_from_cursor!(f32, DmapVec::Float, dimensions, cursor, total_elements, name), - Type::Double => dmapvec_from_cursor!(f64, DmapVec::Double, dimensions, cursor, total_elements, name), + Type::Char => { + dmapvec_from_cursor!(i8, DmapVec::Char, dimensions, cursor, total_elements, name) + } + Type::Short => dmapvec_from_cursor!( + i16, + DmapVec::Short, + dimensions, + cursor, + total_elements, + name + ), + Type::Int => { + dmapvec_from_cursor!(i32, DmapVec::Int, dimensions, cursor, total_elements, name) + } + Type::Long => { + dmapvec_from_cursor!(i64, DmapVec::Long, dimensions, cursor, total_elements, name) + } + Type::Uchar => { + dmapvec_from_cursor!(u8, DmapVec::Uchar, dimensions, cursor, total_elements, name) + } + Type::Ushort => dmapvec_from_cursor!( + u16, + DmapVec::Ushort, + dimensions, + cursor, + total_elements, + name + ), + Type::Uint => { + dmapvec_from_cursor!(u32, DmapVec::Uint, dimensions, cursor, total_elements, name) + } + Type::Ulong => dmapvec_from_cursor!( + u64, + DmapVec::Ulong, + dimensions, + cursor, + total_elements, + name + ), + Type::Float => dmapvec_from_cursor!( + f32, + DmapVec::Float, + dimensions, + cursor, + total_elements, + name + ), + Type::Double => dmapvec_from_cursor!( + f64, + DmapVec::Double, + dimensions, + cursor, + total_elements, + name + ), _ => { return Err(DmapError::InvalidVector(format!( "Invalid type {} for DMAP vector {}", diff --git a/tests/tests.rs b/tests/tests.rs index fe8d95f..4d4aa6d 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,15 +1,15 @@ -use dmap::record::Record; +use dmap::formats::dmap::DmapRecord; use dmap::formats::fitacf::FitacfRecord; use dmap::formats::grid::GridRecord; use dmap::formats::iqdat::IqdatRecord; use dmap::formats::map::MapRecord; use dmap::formats::rawacf::RawacfRecord; use dmap::formats::snd::SndRecord; -use dmap::formats::dmap::DmapRecord; -use dmap::{write_dmap, write_iqdat, write_rawacf, write_fitacf, write_grid, write_map, write_snd}; +use dmap::record::Record; +use dmap::{write_dmap, write_fitacf, write_grid, write_iqdat, write_map, write_rawacf, write_snd}; use itertools::izip; use paste::paste; -use std::fs::{File, remove_file}; +use std::fs::{remove_file, File}; use std::io::Write; use std::path::PathBuf; @@ -59,7 +59,7 @@ macro_rules! make_test { let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); let mut tempfile: PathBuf = filename.clone(); tempfile.set_file_name(format!("tmp.{}.corrupt", stringify!($record_type))); - + let _ = std::fs::copy(filename.clone(), tempfile.clone()).expect("Could not copy to tempfile"); let mut file = File::options().append(true).open(tempfile.clone()).unwrap(); writeln!(&mut file, "not a valid record").expect("Could not write to tempfile"); @@ -102,4 +102,3 @@ make_test!(fitacf); make_test!(grid); make_test!(map); make_test!(snd); - From b3365bdf207062db0a68d9985100cbac2232db88 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 3 Jul 2025 19:44:24 +0000 Subject: [PATCH 09/18] Added new functions to read from / write to bytes via Python API. Fleshed out README.md with a developer guide to explain the crate structure and what the files roughly contain. --- README.md | 46 +++++++++ src/lib.rs | 183 ++++++++++++++++++++++-------------- src/record.rs | 30 +++--- src/types.rs | 252 ++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 405 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 5d8e14a..88e8d2b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,49 @@ # Dmap Rust tools for SuperDARN DMAP file format operations. + +This project exposes both Rust and Python APIs for handling DMAP I/O. +I/O can be conducted either on byte buffers, or directly to/from files. + +The SuperDARN DMAP file formats are all supported (IQDAT, RAWACF, FITACF, GRID, MAP, and SND) +as well as a generic DMAP format that is unaware of any required fields or types +(e.g. char, int32) for any fields. + +## Developer Guidelines + +### `src/record.rs` +This file contains the `Record` trait, which defines a set of functions that specific DMAP formats must implement. +For example, `read_file(infile: &PathBuf) -> Result, DmapError>` is defined in the `Record` trait, and handles +reading in records from a file at the specified path. This function is generic, in that it doesn't know what type of records +(RAWACF, FITACF, etc.) are expected. Also, since it is a trait function, you can only use it through a struct which implements +the trait. For example, the `FitacfRecord` struct defined in `src/formats/fitacf.rs` implements the `Record` trait, and so +you can call `FitacfRecord::read_file(...)` to read a FITACF file, but you couldn't invoke `Record::read_file(...)`. + +### `src/types.rs` +This file defines necessary structs and enums for encapsulating basic types (`i8`, `u32`, `String`, etc.) into +objects like `DmapField`, `DmapScalar`, `DmapVec`, etc. that abstract over the supported underlying types. +For instance, when reading scalar from a DMAP file, the underlying data type is inferred from the `type` field in the +scalar's metadata, so it can't be known beforehand. This requires some encapsulating type, `DmapScalar` in this case, +which contains the metadata of the field and has a known size for the stack memory. + +This file defines the `Fields` struct, which is used to hold the names and types of the required and optional +scalar and vector fields for a type of DMAP record (RAWACF, FITACF, etc.). + +This file defines the `DmapType` trait and implements it for supported data types that can be in DMAP records, namely +`u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f32`, `f64`, and `String`. The implementation of the trait for +these types only means that other types, e.g. `i128`, cannot be stored in DMAP records. + +Lastly, functions for parsing scalars and vectors from a byte buffer are defined in this file. + +### `src/formats` +This directory holds the files that define the DMAP record formats: IQDAT, RAWACF, FITACF, GRID, MAP, SND, and the generic DMAP. +If you are defining a new DMAP format, you will need to make a new file in this directory following the structure of the +existing files. Essentially, you define the scalar and vector fields, both required and optional, and the groups of vector +fields which must have identical dimensions, then call a macro to autogenerate the struct code for you. + +### `tests` +In `tests.rs`, integration tests for reading and writing all file types are present. Small example files +are contained in `tests/test_files`. + +### `benches/io_benchmarking.rs` +This file contains benchmarking functions for checking the performance of the basic read functions. diff --git a/src/lib.rs b/src/lib.rs index 48df26e..7bd223f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ use bzip2::Compression; use indexmap::IndexMap; use paste::paste; use pyo3::prelude::*; +use pyo3::types::PyBytes; use rayon::iter::Either; use rayon::prelude::*; use std::ffi::OsStr; @@ -79,11 +80,10 @@ pub fn write_records<'a>( Ok(()) } -/// Attempts to convert `recs` to `T` then append to `outfile`. -fn try_write_generic Record<'a>>( +/// Attempts to convert `recs` to `T` then convert to bytes. +fn try_to_bytes Record<'a>>( mut recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> +) -> Result, DmapError> where for<'a> >>::Error: Send + Debug, { @@ -105,8 +105,7 @@ where ))? } bytes.par_extend(rec_bytes.into_par_iter().flatten()); - bytes_to_file(bytes, outfile)?; - Ok(()) + Ok(bytes) } /// This macro generates two functions for writing to file. The first, `write_[type]`, takes in @@ -115,17 +114,18 @@ where macro_rules! write_rust { ($type:ident) => { paste! { - /// Write $type:upper records to `outfile`. + #[doc = "Write `" $type:upper "` records to `outfile`." ] pub fn [< write_ $type >](recs: Vec<[< $type:camel Record >]>, outfile: &PathBuf) -> Result<(), DmapError> { write_records(recs, outfile) } - /// Attempts to convert `recs` to `[< $type:camel Record >]` then append to `outfile`. + #[doc = "Attempts to convert `recs` to `" $type:camel Record "` then append to `outfile`." ] pub fn [< try_write_ $type >]( recs: Vec>, outfile: &PathBuf, ) -> Result<(), DmapError> { - try_write_generic::<[< $type:camel Record >]>(recs, outfile) + let bytes = try_to_bytes::<[< $type:camel Record >]>(recs)?; + bytes_to_file(bytes, outfile).map_err(DmapError::from) } } } @@ -139,10 +139,10 @@ write_rust!(map); write_rust!(snd); write_rust!(dmap); -macro_rules! read_type { +macro_rules! read_rust { ($type:ident) => { paste! { - /// Read in a $type:upper file + #[doc = "Read in a `" $type:upper "` file" ] pub fn [< read_ $type >](infile: PathBuf) -> Result]>, DmapError> { [< $type:camel Record >]::read_file(&infile) } @@ -150,75 +150,81 @@ macro_rules! read_type { } } -read_type!(iqdat); -read_type!(rawacf); -read_type!(fitacf); -read_type!(grid); -read_type!(map); -read_type!(snd); -read_type!(dmap); - -/// Reads the data from infile into `Vec`. -/// -/// Returns `Err` if any records are corrupted. -fn read_generic Record<'a> + Send>( - infile: PathBuf, -) -> Result>, DmapError> { - Ok(T::read_file(&infile)? - .into_iter() - .map(|rec| rec.inner()) - .collect()) -} - -/// Reads the data from infile into a tuple of `([IndexMap], int|None)`, where -/// all valid records are returned, plus optionally the byte of the first record -/// with a corruption within the file. Compatible with RST behaviour. -fn read_lax Record<'a> + Send>( - infile: PathBuf, -) -> Result<(Vec>, Option), DmapError> { - let result = T::read_file_lax(&infile)?; - Ok(( - result.0.into_iter().map(|rec| rec.inner()).collect(), - result.1, - )) -} +read_rust!(iqdat); +read_rust!(rawacf); +read_rust!(fitacf); +read_rust!(grid); +read_rust!(map); +read_rust!(snd); +read_rust!(dmap); /// Creates functions for reading DMAP files for the Python API. /// /// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax /// reading, respectively. macro_rules! read_py { - ($name:ident, $py_name:literal, $lax_name:literal) => { + ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal) => { paste! { - /// Reads a $name:upper file, returning a tuple of - /// (list of dictionaries containing the fields, byte where first corrupted record starts). + #[doc = "Reads a `" $name:upper "` file, returning a list of dictionaries containing the fields." ] + #[pyfunction] + #[pyo3(name = $py_name)] + #[pyo3(text_signature = "(infile: str, /)")] + fn [< read_ $name _py >](infile: PathBuf) -> PyResult>> { + Ok([< $name:camel Record >]::read_file(&infile) + .map_err(PyErr::from)? + .into_iter() + .map(|rec| rec.inner()) + .collect() + ) + } + + #[doc = "Reads a `" $name:upper "` file, returning a tuple of" ] + #[doc = "(list of dictionaries containing the fields, byte where first corrupted record starts). "] #[pyfunction] #[pyo3(name = $lax_name)] #[pyo3(text_signature = "(infile: str, /)")] fn [< read_ $name _lax_py >]( infile: PathBuf, ) -> PyResult<(Vec>, Option)> { - read_lax::<[< $name:camel Record >]>(infile).map_err(PyErr::from) + let result = [< $name:camel Record >]::read_file_lax(&infile).map_err(PyErr::from)?; + Ok(( + result.0.into_iter().map(|rec| rec.inner()).collect(), + result.1, + )) } - /// Reads a $name:upper file, returning a list of dictionaries containing the fields. + #[doc = "Read in `" $name:upper "` records from bytes, returning `List[Dict]` of the records." ] #[pyfunction] - #[pyo3(name = $py_name)] - #[pyo3(text_signature = "(infile: str, /)")] - fn [< read_ $name _py >](infile: PathBuf) -> PyResult>> { - read_generic::<[< $name:camel Record >]>(infile).map_err(PyErr::from) + #[pyo3(name = $bytes_name)] + #[pyo3(text_signature = "(buf: bytes, /)")] + fn [< read_ $name _bytes_py >](bytes: &[u8]) -> PyResult>> { + Ok([< $name:camel Record >]::read_records(bytes)? + .into_iter() + .map(|rec| rec.inner()) + .collect() + ) } } } } -read_py!(iqdat, "read_iqdat", "read_iqdat_lax"); -read_py!(rawacf, "read_rawacf", "read_rawacf_lax"); -read_py!(fitacf, "read_fitacf", "read_fitacf_lax"); -read_py!(grid, "read_grid", "read_grid_lax"); -read_py!(map, "read_map", "read_map_lax"); -read_py!(snd, "read_snd", "read_snd_lax"); -read_py!(dmap, "read_dmap", "read_dmap_lax"); +read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes"); +read_py!( + rawacf, + "read_rawacf", + "read_rawacf_lax", + "read_rawacf_bytes" +); +read_py!( + fitacf, + "read_fitacf", + "read_fitacf_lax", + "read_fitacf_bytes" +); +read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes"); +read_py!(map, "read_map", "read_map_lax", "read_map_bytes"); +read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes"); +read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// @@ -232,27 +238,52 @@ fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> Py try_write_dmap(recs, &outfile).map_err(PyErr::from) } +/// Checks that a list of dictionaries contains valid DMAP records, then converts them to bytes. +/// Returns `list[bytes]`, one entry per record. +/// +/// **NOTE:** No type checking is done, so the fields may not be written as the expected +/// DMAP type, e.g. `stid` might be written one byte instead of two as this function +/// does not know that typically `stid` is two bytes. +#[pyfunction] +#[pyo3(name = "write_dmap_bytes")] +#[pyo3(text_signature = "(recs: list[dict], /)")] +fn write_dmap_bytes_py(recs: Vec>) -> PyResult> { + let bytes = try_to_bytes::(recs).map_err(PyErr::from)?; + Ok(bytes) +} + /// Generates functions exposed to the Python API for writing specific file types. macro_rules! write_py { - ($name:ident, $fn_name:literal) => { + ($name:ident, $fn_name:literal, $bytes_name:literal) => { paste! { - /// Checks that a list of dictionaries contains valid $name:upper records, then appends to outfile. + #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then appends to outfile." ] #[pyfunction] #[pyo3(name = $fn_name)] #[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] fn [< write_ $name _py >](recs: Vec>, outfile: PathBuf) -> PyResult<()> { [< try_write_ $name >](recs, &outfile).map_err(PyErr::from) } + + #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then converts them to bytes." ] + #[doc = "Returns `list[bytes]`, one entry per record." ] + #[pyfunction] + #[pyo3(name = $bytes_name)] + #[pyo3(text_signature = "(recs: list[dict], /)")] + fn [< write_ $name _bytes_py >](py: Python, recs: Vec>) -> PyResult { + let bytes = try_to_bytes::<[< $name:camel Record >]>(recs).map_err(PyErr::from)?; + Ok(PyBytes::new_bound(py, &bytes).into()) + } } } } -write_py!(iqdat, "write_iqdat"); -write_py!(rawacf, "write_rawacf"); -write_py!(fitacf, "write_fitacf"); -write_py!(grid, "write_grid"); -write_py!(map, "write_map"); -write_py!(snd, "write_snd"); +// **NOTE** dmap type not included in this list, since it has a more descriptive docstring. +write_py!(iqdat, "write_iqdat", "write_iqdat_bytes"); +write_py!(rawacf, "write_rawacf", "write_rawacf_bytes"); +write_py!(fitacf, "write_fitacf", "write_fitacf_bytes"); +write_py!(grid, "write_grid", "write_grid_bytes"); +write_py!(map, "write_map", "write_map_bytes"); +write_py!(snd, "write_snd", "write_snd_bytes"); /// Functions for SuperDARN DMAP file format I/O. #[pymodule] @@ -275,6 +306,15 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(read_grid_lax_py, m)?)?; m.add_function(wrap_pyfunction!(read_map_lax_py, m)?)?; + // Read functions from byte buffer + m.add_function(wrap_pyfunction!(read_dmap_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_iqdat_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_rawacf_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_fitacf_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_snd_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_grid_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(read_map_bytes_py, m)?)?; + // Write functions m.add_function(wrap_pyfunction!(write_dmap_py, m)?)?; m.add_function(wrap_pyfunction!(write_iqdat_py, m)?)?; @@ -284,5 +324,14 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(write_map_py, m)?)?; m.add_function(wrap_pyfunction!(write_snd_py, m)?)?; + // Convert records to bytes + m.add_function(wrap_pyfunction!(write_dmap_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_iqdat_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_rawacf_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_fitacf_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_snd_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_grid_bytes_py, m)?)?; + m.add_function(wrap_pyfunction!(write_map_bytes_py, m)?)?; + Ok(()) } diff --git a/src/record.rs b/src/record.rs index 2722889..46837a2 100644 --- a/src/record.rs +++ b/src/record.rs @@ -15,12 +15,12 @@ use std::path::PathBuf; pub trait Record<'a>: Debug + Send + TryFrom<&'a mut IndexMap, Error = DmapError> { - /// Gets the underlying data of the Record. + /// Gets the underlying data of `self`. fn inner(self) -> IndexMap; - /// Reads from dmap_data and parses into a collection of Records. + /// Reads from `dmap_data` and parses into `Vec`. /// - /// Returns `DmapError` if dmap_data cannot be read or contains invalid data. + /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. fn read_records(mut dmap_data: impl Read) -> Result, DmapError> where Self: Sized, @@ -83,9 +83,10 @@ pub trait Record<'a>: Ok(dmap_records) } - /// Reads from dmap_data and parses into a collection of Records. + /// Reads from `dmap_data` and parses into `Vec`. /// - /// Returns a tuple of `(good records, Option)`. + /// Returns a 2-tuple, where the first entry is the good records from the front of the buffer, + /// and the second entry is the byte where the first corrupted record starts, if applicable. fn read_records_lax(mut dmap_data: impl Read) -> Result<(Vec, Option), DmapError> where Self: Sized, @@ -151,7 +152,10 @@ pub trait Record<'a>: } } - /// Read a DMAP file of type `Self`, + /// Read a DMAP file of type `Self`. + /// + /// If the file is corrupted, it will return the leading uncorrupted records as well as the + /// position corresponding to the start of the first corrupted record. fn read_file_lax(infile: &PathBuf) -> Result<(Vec, Option), DmapError> where Self: Sized, @@ -167,7 +171,7 @@ pub trait Record<'a>: } } - /// Reads a record starting from cursor position + /// Reads a record from `cursor`. fn parse_record(cursor: &mut Cursor>) -> Result where Self: Sized, @@ -246,7 +250,7 @@ pub trait Record<'a>: Self::new(&mut fields) } - /// Creates a new object from the parsed scalars and vectors + /// Creates a new object from the parsed scalars and vectors. fn new(fields: &mut IndexMap) -> Result where Self: Sized; @@ -389,7 +393,7 @@ pub trait Record<'a>: DmapField::Scalar(x.cast_as(expected_type)?), ); } - Some(&DmapField::Scalar(_)) => {} + Some(DmapField::Scalar(_)) => {} Some(_) => Err(DmapError::InvalidRecord(format!( "Field {} is a vector, expected scalar", field @@ -419,7 +423,7 @@ pub trait Record<'a>: } for (field, expected_type) in fields_for_type.vectors_required.iter() { match fields_dict.get(&field.to_string()) { - Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( + Some(DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!( "Field {} is a scalar, expected vector", field )))?, @@ -429,7 +433,7 @@ pub trait Record<'a>: x.get_type() )))? } - Some(&DmapField::Vector(_)) => {} + Some(DmapField::Vector(_)) => {} None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?, } } @@ -456,7 +460,7 @@ pub trait Record<'a>: fn to_bytes(&self) -> Result, DmapError>; /// Converts the entries of an `IndexMap` into a raw byte representation, including metadata - /// about the entries (DMAP key, name\[, dimensions\]) + /// about the entries `(DMAP key, name\[, dimensions\])`. /// /// If all is good, returns a tuple containing: /// * the number of scalar fields @@ -545,7 +549,7 @@ macro_rules! create_record_type { use indexmap::IndexMap; use crate::record::Record; - /// Struct containing the checked fields of a single RAWACF record. + #[doc = "Struct containing the checked fields of a single `" $format:upper "` record." ] #[derive(Debug, PartialEq, Clone)] pub struct [< $format:camel Record >] { pub data: IndexMap, diff --git a/src/types.rs b/src/types.rs index b7a8855..f57cefd 100644 --- a/src/types.rs +++ b/src/types.rs @@ -152,6 +152,7 @@ impl DmapScalar { Self::String(_) => Type::String, } } + /// Converts `self` into a new `Type`, if possible. pub(crate) fn cast_as(&self, new_type: &Type) -> Result { match new_type { @@ -441,7 +442,7 @@ impl IntoPy for DmapField { /// `TryFrom for i8` /// `TryFrom for i8` macro_rules! scalar_impls { - ($type:ty, $enum_var:path) => { + ($type:ty, $enum_var:path, $type_var:path) => { impl From<$type> for DmapField { fn from(value: $type) -> Self { DmapField::Scalar($enum_var(value)) @@ -454,39 +455,26 @@ macro_rules! scalar_impls { match value { DmapField::Scalar(x) => x.try_into(), _ => Err(Self::Error::InvalidScalar(format!( - "Cannot interpret as {}", + "Cannot interpret {value:?} as {}", stringify!($type) ))), } } } - impl TryFrom for $type { - type Error = DmapError; - fn try_from(value: DmapScalar) -> std::result::Result { - if let $enum_var(x) = value { - Ok(x) - } else { - Err(DmapError::InvalidScalar(format!( - "Unable to convert {value} to {}", - stringify!($type) - ))) - } - } - } }; } -scalar_impls!(i8, DmapScalar::Char); -scalar_impls!(i16, DmapScalar::Short); -scalar_impls!(i32, DmapScalar::Int); -scalar_impls!(i64, DmapScalar::Long); -scalar_impls!(u8, DmapScalar::Uchar); -scalar_impls!(u16, DmapScalar::Ushort); -scalar_impls!(u32, DmapScalar::Uint); -scalar_impls!(u64, DmapScalar::Ulong); -scalar_impls!(f32, DmapScalar::Float); -scalar_impls!(f64, DmapScalar::Double); -scalar_impls!(String, DmapScalar::String); +scalar_impls!(i8, DmapScalar::Char, Type::Char); +scalar_impls!(i16, DmapScalar::Short, Type::Short); +scalar_impls!(i32, DmapScalar::Int, Type::Int); +scalar_impls!(i64, DmapScalar::Long, Type::Long); +scalar_impls!(u8, DmapScalar::Uchar, Type::Uchar); +scalar_impls!(u16, DmapScalar::Ushort, Type::Ushort); +scalar_impls!(u32, DmapScalar::Uint, Type::Uint); +scalar_impls!(u64, DmapScalar::Ulong, Type::Ulong); +scalar_impls!(f32, DmapScalar::Float, Type::Float); +scalar_impls!(f64, DmapScalar::Double, Type::Double); +scalar_impls!(String, DmapScalar::String, Type::String); /// Trait for raw types that can be stored in DMAP files. pub trait DmapType: std::fmt::Debug { @@ -576,6 +564,218 @@ impl DmapType for String { } } +impl TryFrom for u8 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as u8), + DmapScalar::Short(x) => Ok(x as u8), + DmapScalar::Int(x) => Ok(x as u8), + DmapScalar::Long(x) => Ok(x as u8), + DmapScalar::Uchar(x) => Ok(x), + DmapScalar::Ushort(x) => Ok(x as u8), + DmapScalar::Uint(x) => Ok(x as u8), + DmapScalar::Ulong(x) => Ok(x as u8), + DmapScalar::Float(x) => Ok(x as u8), + DmapScalar::Double(x) => Ok(x as u8), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to u8" + ))), + } + } +} +impl TryFrom for u16 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as u16), + DmapScalar::Short(x) => Ok(x as u16), + DmapScalar::Int(x) => Ok(x as u16), + DmapScalar::Long(x) => Ok(x as u16), + DmapScalar::Uchar(x) => Ok(x as u16), + DmapScalar::Ushort(x) => Ok(x), + DmapScalar::Uint(x) => Ok(x as u16), + DmapScalar::Ulong(x) => Ok(x as u16), + DmapScalar::Float(x) => Ok(x as u16), + DmapScalar::Double(x) => Ok(x as u16), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to u16" + ))), + } + } +} +impl TryFrom for u32 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as u32), + DmapScalar::Short(x) => Ok(x as u32), + DmapScalar::Int(x) => Ok(x as u32), + DmapScalar::Long(x) => Ok(x as u32), + DmapScalar::Uchar(x) => Ok(x as u32), + DmapScalar::Ushort(x) => Ok(x as u32), + DmapScalar::Uint(x) => Ok(x), + DmapScalar::Ulong(x) => Ok(x as u32), + DmapScalar::Float(x) => Ok(x as u32), + DmapScalar::Double(x) => Ok(x as u32), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to u32" + ))), + } + } +} +impl TryFrom for u64 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as u64), + DmapScalar::Short(x) => Ok(x as u64), + DmapScalar::Int(x) => Ok(x as u64), + DmapScalar::Long(x) => Ok(x as u64), + DmapScalar::Uchar(x) => Ok(x as u64), + DmapScalar::Ushort(x) => Ok(x as u64), + DmapScalar::Uint(x) => Ok(x as u64), + DmapScalar::Ulong(x) => Ok(x), + DmapScalar::Float(x) => Ok(x as u64), + DmapScalar::Double(x) => Ok(x as u64), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to u64" + ))), + } + } +} +impl TryFrom for i8 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x), + DmapScalar::Short(x) => Ok(x as i8), + DmapScalar::Int(x) => Ok(x as i8), + DmapScalar::Long(x) => Ok(x as i8), + DmapScalar::Uchar(x) => Ok(x as i8), + DmapScalar::Ushort(x) => Ok(x as i8), + DmapScalar::Uint(x) => Ok(x as i8), + DmapScalar::Ulong(x) => Ok(x as i8), + DmapScalar::Float(x) => Ok(x as i8), + DmapScalar::Double(x) => Ok(x as i8), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to i8" + ))), + } + } +} +impl TryFrom for i16 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as i16), + DmapScalar::Short(x) => Ok(x), + DmapScalar::Int(x) => Ok(x as i16), + DmapScalar::Long(x) => Ok(x as i16), + DmapScalar::Uchar(x) => Ok(x as i16), + DmapScalar::Ushort(x) => Ok(x as i16), + DmapScalar::Uint(x) => Ok(x as i16), + DmapScalar::Ulong(x) => Ok(x as i16), + DmapScalar::Float(x) => Ok(x as i16), + DmapScalar::Double(x) => Ok(x as i16), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to i16" + ))), + } + } +} +impl TryFrom for i32 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as i32), + DmapScalar::Short(x) => Ok(x as i32), + DmapScalar::Int(x) => Ok(x), + DmapScalar::Long(x) => Ok(x as i32), + DmapScalar::Uchar(x) => Ok(x as i32), + DmapScalar::Ushort(x) => Ok(x as i32), + DmapScalar::Uint(x) => Ok(x as i32), + DmapScalar::Ulong(x) => Ok(x as i32), + DmapScalar::Float(x) => Ok(x as i32), + DmapScalar::Double(x) => Ok(x as i32), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to i32" + ))), + } + } +} +impl TryFrom for i64 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as i64), + DmapScalar::Short(x) => Ok(x as i64), + DmapScalar::Int(x) => Ok(x as i64), + DmapScalar::Long(x) => Ok(x), + DmapScalar::Uchar(x) => Ok(x as i64), + DmapScalar::Ushort(x) => Ok(x as i64), + DmapScalar::Uint(x) => Ok(x as i64), + DmapScalar::Ulong(x) => Ok(x as i64), + DmapScalar::Float(x) => Ok(x as i64), + DmapScalar::Double(x) => Ok(x as i64), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to i64" + ))), + } + } +} +impl TryFrom for f32 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as f32), + DmapScalar::Short(x) => Ok(x as f32), + DmapScalar::Int(x) => Ok(x as f32), + DmapScalar::Long(x) => Ok(x as f32), + DmapScalar::Uchar(x) => Ok(x as f32), + DmapScalar::Ushort(x) => Ok(x as f32), + DmapScalar::Uint(x) => Ok(x as f32), + DmapScalar::Ulong(x) => Ok(x as f32), + DmapScalar::Float(x) => Ok(x), + DmapScalar::Double(x) => Ok(x as f32), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to f32" + ))), + } + } +} +impl TryFrom for f64 { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::Char(x) => Ok(x as f64), + DmapScalar::Short(x) => Ok(x as f64), + DmapScalar::Int(x) => Ok(x as f64), + DmapScalar::Long(x) => Ok(x as f64), + DmapScalar::Uchar(x) => Ok(x as f64), + DmapScalar::Ushort(x) => Ok(x as f64), + DmapScalar::Uint(x) => Ok(x as f64), + DmapScalar::Ulong(x) => Ok(x as f64), + DmapScalar::Float(x) => Ok(x as f64), + DmapScalar::Double(x) => Ok(x), + DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to f64" + ))), + } + } +} +impl TryFrom for String { + type Error = DmapError; + fn try_from(value: DmapScalar) -> std::result::Result { + match value { + DmapScalar::String(x) => Ok(x), + x => Err(DmapError::InvalidScalar(format!( + "Unable to convert {x} to String" + ))), + } + } +} + /// Verify that `name` exists in `fields` and is of the correct `Type`. pub fn check_scalar( fields: &mut IndexMap, From 33120f7aae381851b62a989015c595964a84a65c Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 3 Jul 2025 20:00:48 +0000 Subject: [PATCH 10/18] Version number bump --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 98f61b6..6e2757b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dmap" -version = "0.1.6" +version = "0.2.0" edition = "2021" rust-version = "1.63.0" diff --git a/pyproject.toml b/pyproject.toml index ff7b0d7..7b40afb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.1.6" +version = "0.2.0" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" } From 59539c48796b2ccf730d6c10b3f251176de17421 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 3 Jul 2025 20:05:30 +0000 Subject: [PATCH 11/18] Run all OS builds in CI for PR --- .github/workflows/CI.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 27cb4a9..596c377 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -59,7 +59,6 @@ jobs: musllinux: runs-on: ${{ matrix.platform.runner }} - if: "startsWith(github.ref, 'refs/tags/')" strategy: matrix: platform: @@ -91,7 +90,6 @@ jobs: windows: runs-on: ${{ matrix.platform.runner }} - if: "startsWith(github.ref, 'refs/tags/')" strategy: matrix: platform: @@ -117,7 +115,6 @@ jobs: macos: runs-on: ${{ matrix.platform.runner }} - if: "startsWith(github.ref, 'refs/tags/')" strategy: matrix: platform: From e397d5e4e8eb2faa4e077027ec68cdc32b334165 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Mon, 7 Jul 2025 14:53:49 +0000 Subject: [PATCH 12/18] Updated CI.yml with new `maturin-ci generate` command. * Adds free-threaded Python 3.13 support --- .github/workflows/CI.yml | 81 ++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 596c377..72324e4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -# This file is autogenerated by maturin v1.7.0 +# This file is autogenerated by maturin v1.9.0 # To update, run # # maturin generate-ci github --zig @@ -25,18 +25,18 @@ jobs: strategy: matrix: platform: - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: x86_64 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: x86 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: aarch64 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: armv7 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: s390x -# - runner: ubuntu-latest -# target: ppc64le + # - runner: ubuntu-22.04 + # target: ppc64le steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -51,6 +51,15 @@ jobs: manylinux: auto before-script-linux: | sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config + - name: Build free-threaded wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --zig -i python3.13t + sccache: 'false' + manylinux: auto + before-script-linux: | + sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -62,13 +71,13 @@ jobs: strategy: matrix: platform: - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: x86_64 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: x86 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: aarch64 - - runner: ubuntu-latest + - runner: ubuntu-22.04 target: armv7 steps: - uses: actions/checkout@v4 @@ -82,6 +91,15 @@ jobs: args: --release --out dist sccache: 'false' manylinux: musllinux_1_2 + before-script-linux: | + sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config + - name: Build free-threaded wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist -i python3.13t + sccache: 'false' + manylinux: musllinux_1_2 - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -95,6 +113,8 @@ jobs: platform: - runner: windows-latest target: x64 + - runner: windows-latest + target: x86 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -107,6 +127,16 @@ jobs: target: ${{ matrix.platform.target }} args: --release --out dist sccache: 'false' + - uses: actions/setup-python@v5 + with: + python-version: 3.13t + architecture: ${{ matrix.platform.target }} + - name: Build free-threaded wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist -i python3.13t + sccache: 'false' - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -118,7 +148,7 @@ jobs: strategy: matrix: platform: - - runner: macos-12 + - runner: macos-13 target: x86_64 - runner: macos-14 target: aarch64 @@ -133,6 +163,12 @@ jobs: target: ${{ matrix.platform.target }} args: --release --out dist sccache: 'false' + - name: Build free-threaded wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist -i python3.13t + sccache: 'false' - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -143,9 +179,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - name: Build sdist uses: PyO3/maturin-action@v1 with: @@ -160,18 +193,26 @@ jobs: release: name: Release runs-on: ubuntu-latest - if: "startsWith(github.ref, 'refs/tags/')" + if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} needs: [linux, musllinux, windows, macos, sdist] permissions: + # Use to sign the release artifacts id-token: write - environment: release + # Used to upload release artifacts + contents: write + # Used to generate artifact attestation + attestations: write steps: - uses: actions/download-artifact@v4 - - uses: actions/setup-python@v5 + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v2 with: - python-version: 3.x + subject-path: 'wheels-*/*' - name: Publish to PyPI + if: ${{ startsWith(github.ref, 'refs/tags/') }} uses: PyO3/maturin-action@v1 + # env: + # MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} with: command: upload args: --non-interactive --skip-existing wheels-*/* From a0d7e251fd0b0c76d97769b55e02f0c23eba894a Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Mon, 7 Jul 2025 14:57:00 +0000 Subject: [PATCH 13/18] Disable free-threaded Python 3.13 support. --- .github/workflows/CI.yml | 56 ++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 72324e4..92a7ec2 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -51,15 +51,15 @@ jobs: manylinux: auto before-script-linux: | sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config - - name: Build free-threaded wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --zig -i python3.13t - sccache: 'false' - manylinux: auto - before-script-linux: | - sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config +# - name: Build free-threaded wheels +# uses: PyO3/maturin-action@v1 +# with: +# target: ${{ matrix.platform.target }} +# args: --release --out dist --zig -i python3.13t +# sccache: 'false' +# manylinux: auto +# before-script-linux: | +# sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -93,13 +93,13 @@ jobs: manylinux: musllinux_1_2 before-script-linux: | sudo apt update -y && sudo apt-get install -y libssl-dev openssl pkg-config - - name: Build free-threaded wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist -i python3.13t - sccache: 'false' - manylinux: musllinux_1_2 +# - name: Build free-threaded wheels +# uses: PyO3/maturin-action@v1 +# with: +# target: ${{ matrix.platform.target }} +# args: --release --out dist -i python3.13t +# sccache: 'false' +# manylinux: musllinux_1_2 - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -131,12 +131,12 @@ jobs: with: python-version: 3.13t architecture: ${{ matrix.platform.target }} - - name: Build free-threaded wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist -i python3.13t - sccache: 'false' +# - name: Build free-threaded wheels +# uses: PyO3/maturin-action@v1 +# with: +# target: ${{ matrix.platform.target }} +# args: --release --out dist -i python3.13t +# sccache: 'false' - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -163,12 +163,12 @@ jobs: target: ${{ matrix.platform.target }} args: --release --out dist sccache: 'false' - - name: Build free-threaded wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist -i python3.13t - sccache: 'false' +# - name: Build free-threaded wheels +# uses: PyO3/maturin-action@v1 +# with: +# target: ${{ matrix.platform.target }} +# args: --release --out dist -i python3.13t +# sccache: 'false' - name: Upload wheels uses: actions/upload-artifact@v4 with: From 03ce4382c5705f7823f25fb5e64fbb37a335adce Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Mon, 7 Jul 2025 20:28:23 +0000 Subject: [PATCH 14/18] Return Python bytes for `write_dmap_bytes` --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7bd223f..ceba139 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -247,9 +247,9 @@ fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> Py #[pyfunction] #[pyo3(name = "write_dmap_bytes")] #[pyo3(text_signature = "(recs: list[dict], /)")] -fn write_dmap_bytes_py(recs: Vec>) -> PyResult> { +fn write_dmap_bytes_py(py: Python, recs: Vec>) -> PyResult { let bytes = try_to_bytes::(recs).map_err(PyErr::from)?; - Ok(bytes) + Ok(PyBytes::new_bound(py, &bytes).into()) } /// Generates functions exposed to the Python API for writing specific file types. From 2236bea614a3bdd6e000fdef52ed92539b0fbec7 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 9 Jul 2025 21:51:11 +0000 Subject: [PATCH 15/18] Added lax reading from bytes for Python API. --- src/lib.rs | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ceba139..2b8d1d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -163,7 +163,7 @@ read_rust!(dmap); /// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax /// reading, respectively. macro_rules! read_py { - ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal) => { + ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal, $lax_bytes_name:literal) => { paste! { #[doc = "Reads a `" $name:upper "` file, returning a list of dictionaries containing the fields." ] #[pyfunction] @@ -204,27 +204,44 @@ macro_rules! read_py { .collect() ) } + + #[doc = "Reads a `" $name:upper "` file, returning a tuple of" ] + #[doc = "(list of dictionaries containing the fields, byte where first corrupted record starts). "] + #[pyfunction] + #[pyo3(name = $lax_bytes_name)] + #[pyo3(text_signature = "(buf: bytes, /)")] + fn [< read_ $name _bytes_lax_py >]( + bytes: &[u8], + ) -> PyResult<(Vec>, Option)> { + let result = [< $name:camel Record >]::read_records_lax(bytes).map_err(PyErr::from)?; + Ok(( + result.0.into_iter().map(|rec| rec.inner()).collect(), + result.1, + )) + } } } } -read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes"); +read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes", "read_iqdat_bytes_lax"); read_py!( rawacf, "read_rawacf", "read_rawacf_lax", - "read_rawacf_bytes" + "read_rawacf_bytes", + "read_rawacf_bytes_lax" ); read_py!( fitacf, "read_fitacf", "read_fitacf_lax", - "read_fitacf_bytes" + "read_fitacf_bytes", + "read_fitacf_bytes_lax" ); -read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes"); -read_py!(map, "read_map", "read_map_lax", "read_map_bytes"); -read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes"); -read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes"); +read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes", "read_grid_bytes_lax"); +read_py!(map, "read_map", "read_map_lax", "read_map_bytes", "read_map_bytes_lax"); +read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes", "read_snd_bytes_lax"); +read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes", "read_dmap_bytes_lax"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// @@ -315,6 +332,15 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(read_grid_bytes_py, m)?)?; m.add_function(wrap_pyfunction!(read_map_bytes_py, m)?)?; + // Lax read functions from byte buffer + m.add_function(wrap_pyfunction!(read_dmap_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_iqdat_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_rawacf_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_fitacf_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_snd_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_grid_bytes_lax_py, m)?)?; + m.add_function(wrap_pyfunction!(read_map_bytes_lax_py, m)?)?; + // Write functions m.add_function(wrap_pyfunction!(write_dmap_py, m)?)?; m.add_function(wrap_pyfunction!(write_iqdat_py, m)?)?; From 5dd20e7f03601322700e93b8c72cf6e7265d07b8 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 9 Jul 2025 21:53:44 +0000 Subject: [PATCH 16/18] Version bump and CI.yml update to fix uploading to PyPI --- .github/workflows/CI.yml | 4 ++-- Cargo.toml | 2 +- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 92a7ec2..e2d6975 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -211,8 +211,8 @@ jobs: - name: Publish to PyPI if: ${{ startsWith(github.ref, 'refs/tags/') }} uses: PyO3/maturin-action@v1 - # env: - # MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} with: command: upload args: --non-interactive --skip-existing wheels-*/* diff --git a/Cargo.toml b/Cargo.toml index 6e2757b..d3b8730 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dmap" -version = "0.2.0" +version = "0.2.1" edition = "2021" rust-version = "1.63.0" diff --git a/pyproject.toml b/pyproject.toml index 7b40afb..b964138 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.2.0" +version = "0.2.1" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" } From f926c8a3a4bdd2bc91d9b68d174641e0edd39f7a Mon Sep 17 00:00:00 2001 From: Remington Rohel <77300402+RemingtonRohel@users.noreply.github.com> Date: Tue, 19 Aug 2025 15:17:41 +0000 Subject: [PATCH 17/18] Added ability to "sniff" first record of files. (#24) * Useful for getting a look at the contents without having to read in the entire file. * Doesn't decompress the entire file if using bz2 compression; will only decompress what is necessary to read the first record. --- src/lib.rs | 38 ++++++++++++++++++++++++++++++-------- src/record.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tests/tests.rs | 8 ++++++++ 3 files changed, 84 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2b8d1d5..75d8847 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -163,7 +163,7 @@ read_rust!(dmap); /// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax /// reading, respectively. macro_rules! read_py { - ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal, $lax_bytes_name:literal) => { + ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal, $lax_bytes_name:literal, $sniff_name:literal) => { paste! { #[doc = "Reads a `" $name:upper "` file, returning a list of dictionaries containing the fields." ] #[pyfunction] @@ -219,29 +219,42 @@ macro_rules! read_py { result.1, )) } + + #[doc = "Reads a `" $name:upper "` file, returning the first record." ] + #[pyfunction] + #[pyo3(name = $sniff_name)] + #[pyo3(text_signature = "(infile: str, /)")] + fn [< sniff_ $name _py >](infile: PathBuf) -> PyResult> { + Ok([< $name:camel Record >]::sniff_file(&infile) + .map_err(PyErr::from)? + .inner() + ) + } } } } -read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes", "read_iqdat_bytes_lax"); +read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes", "read_iqdat_bytes_lax", "sniff_iqdat"); read_py!( rawacf, "read_rawacf", "read_rawacf_lax", "read_rawacf_bytes", - "read_rawacf_bytes_lax" + "read_rawacf_bytes_lax", + "sniff_rawacf" ); read_py!( fitacf, "read_fitacf", "read_fitacf_lax", "read_fitacf_bytes", - "read_fitacf_bytes_lax" + "read_fitacf_bytes_lax", + "sniff_fitacf" ); -read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes", "read_grid_bytes_lax"); -read_py!(map, "read_map", "read_map_lax", "read_map_bytes", "read_map_bytes_lax"); -read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes", "read_snd_bytes_lax"); -read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes", "read_dmap_bytes_lax"); +read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes", "read_grid_bytes_lax", "sniff_grid"); +read_py!(map, "read_map", "read_map_lax", "read_map_bytes", "read_map_bytes_lax", "sniff_map"); +read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes", "read_snd_bytes_lax", "sniff_snd"); +read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes", "read_dmap_bytes_lax", "sniff_dmap"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// @@ -359,5 +372,14 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(write_grid_bytes_py, m)?)?; m.add_function(wrap_pyfunction!(write_map_bytes_py, m)?)?; + // Sniff the first record + m.add_function(wrap_pyfunction!(sniff_dmap_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_iqdat_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_rawacf_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_fitacf_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_snd_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_grid_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_map_py, m)?)?; + Ok(()) } diff --git a/src/record.rs b/src/record.rs index 46837a2..3bc99e5 100644 --- a/src/record.rs +++ b/src/record.rs @@ -18,6 +18,36 @@ pub trait Record<'a>: /// Gets the underlying data of `self`. fn inner(self) -> IndexMap; + /// Reads from `dmap_data` and parses into `Vec`. + /// + /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. + fn read_first_record(mut dmap_data: impl Read) -> Result + where + Self: Sized, + Self: Send, + { + let mut buffer = [0; 8]; // record size should be an i32 of the data + let read_result = dmap_data.read(&mut buffer[..])?; + if read_result < buffer.len() { + return Err(DmapError::CorruptStream("Unable to read size of first record")) + } + + let rec_size = i32::from_le_bytes(buffer[4..8].try_into().unwrap()) as usize; // advance 4 bytes, skipping the "code" field + if rec_size <= 0 { + return Err(DmapError::InvalidRecord(format!( + "Record 0 starting at byte 0 has non-positive size {} <= 0", + rec_size + ))); + } + + let mut rec = vec![0; rec_size]; + rec[0..8].clone_from_slice(&buffer[..]); + dmap_data.read_exact(&mut rec[8..])?; + let first_rec = Self::parse_record(&mut Cursor::new(rec))?; + + Ok(first_rec) + } + /// Reads from `dmap_data` and parses into `Vec`. /// /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. @@ -171,6 +201,22 @@ pub trait Record<'a>: } } + /// Reads the first record of a DMAP file of type `Self`. + fn sniff_file(infile: &PathBuf) -> Result + where + Self: Sized, + Self: Send, + { + let file = File::open(infile)?; + match infile.extension() { + Some(ext) if ext == OsStr::new("bz2") => { + let compressor = BzDecoder::new(file); + Self::read_first_record(compressor) + } + _ => Self::read_first_record(file), + } + } + /// Reads a record from `cursor`. fn parse_record(cursor: &mut Cursor>) -> Result where diff --git a/tests/tests.rs b/tests/tests.rs index 4d4aa6d..8266b66 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -92,6 +92,14 @@ macro_rules! make_test { // Clean up tempfile remove_file(&tempfile).expect("Unable to delete tempfile"); } + + #[test] + fn [< test_ $record_type _sniff >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); + let data = [< $record_type:camel Record >]::sniff_file(&filename).expect("Unable to sniff file"); + let all_recs = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); + assert_eq!(data, all_recs[0]) + } } }; } From 86e073d809ed9d6716065c55865cb680484fec35 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Tue, 19 Aug 2025 15:22:58 +0000 Subject: [PATCH 18/18] Version bump to 0.3.0 --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d3b8730..ec8556d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dmap" -version = "0.2.1" +version = "0.3.0" edition = "2021" rust-version = "1.63.0" diff --git a/pyproject.toml b/pyproject.toml index b964138..bb456ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.2.1" +version = "0.3.0" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" }