From 319ec81ed3288e24c0a092cfaf63706369de79ef Mon Sep 17 00:00:00 2001
From: Alexander McKim <alexander.mckim@cuanschutz.edu>
Date: Wed, 27 May 2026 17:01:54 -0600
Subject: [PATCH 1/6] adding tests and data, new vignette, biocparallel

---
 R/arg_check_ml.R              |   5 +-
 R/core_ml.R                   | 151 ++++++++++---
 R/data.R                      |  18 ++
 R/generate_matrices_ml.R      |  13 +-
 R/ife_ml.R                    |  18 ++
 R/plot_ml.R                   |  55 ++++-
 R/prep_ml.R                   |  45 +++-
 R/run_Fisher_tests.R          |  50 +++++
 R/run_ML.R                    |  62 +++---
 R/run_ml_pipeline.R           |   9 +
 data/demo_fit.rda             | Bin 0 -> 24156 bytes
 data/demo_ml_tibble.rda       | Bin 0 -> 848 bytes
 inst/scripts/make_demo_data.R |  54 +++++
 man/amRml-package.Rd          |   5 +
 man/applyBenjaminiHochberg.Rd |   7 +
 man/buildLRModel.Rd           |   6 +-
 man/buildRecipe.Rd            |  13 +-
 man/buildTuningGrid.Rd        |  13 +-
 man/buildWflow.Rd             |  11 +
 man/calculateEvalMets.Rd      |  17 ++
 man/calculateMinSamples.Rd    |   4 +
 man/computeFeatureFreq.Rd     |  13 ++
 man/createMLResultDir.Rd      |  17 +-
 man/createMLinputList.Rd      |   5 +-
 man/demo_fit.Rd               |  20 ++
 man/demo_ml_tibble.Rd         |  22 ++
 man/dot-getTargetVarName.Rd   |   9 +
 man/encodePhenotype.Rd        |  11 +
 man/extractTopFeats.Rd        |   8 +-
 man/fitBestModel.Rd           |  12 ++
 man/getConfusionMatrix.Rd     |  15 ++
 man/getNumFeat.Rd             |   9 +
 man/loadMLInputTibble.Rd      |  15 +-
 man/plotBaselineComparison.Rd |  11 +
 man/plotDefaultEval.Rd        |  18 +-
 man/plotFishers.Rd            |  17 +-
 man/plotPRC.Rd                |  17 ++
 man/plotTopFeatsVI.Rd         |   6 +-
 man/predictML.Rd              |   6 +
 man/removeTopFeats.Rd         |  11 +
 man/runFisherTests.Rd         |  10 +
 man/runFishers.Rd             |  14 ++
 man/runIFE.Rd                 |  21 +-
 man/runMDRmodels.Rd           |  12 +-
 man/runMLPipeline.Rd          |  42 ++--
 man/runMLmodels.Rd            |  24 ++-
 man/runModelingPipeline.Rd    |  11 +
 man/selectBestModel.Rd        |  13 +-
 man/shuffleLabels.Rd          |  10 +-
 man/splitMLInputTibble.Rd     |  13 +-
 man/tuneGrid.Rd               |  11 +-
 vignettes/intro.Rmd           | 395 ++++++++++++++--------------------
 52 files changed, 995 insertions(+), 379 deletions(-)
 create mode 100644 R/data.R
 create mode 100644 data/demo_fit.rda
 create mode 100644 data/demo_ml_tibble.rda
 create mode 100644 inst/scripts/make_demo_data.R
 create mode 100644 man/demo_fit.Rd
 create mode 100644 man/demo_ml_tibble.Rd

diff --git a/R/arg_check_ml.R b/R/arg_check_ml.R
index 41f01c8..f6acf09 100644
--- a/R/arg_check_ml.R
+++ b/R/arg_check_ml.R
@@ -1,5 +1,6 @@
 # This script contains functions to check the arguments of ML functions.
 
+#' @importFrom methods is
 #' @importFrom tibble is_tibble
 NULL
 
@@ -254,7 +255,7 @@ NULL
 #' `buildLRModel()` (random forest and boosted tree support planned)
 #'
 .checkArgParsnipMod <- function(parsnip_mod) {
-  if (class(parsnip_mod)[2] != "model_spec") {
+  if (!is(parsnip_mod, "model_spec")) {
     stop("A `parsnip` model was expected but not received.")
   }
 }
@@ -418,7 +419,7 @@ NULL
 #' @param tune_res Results of grid tuning, such as the output of `tuneGrid()`
 #'
 .checkArgTuneRes <- function(tune_res) {
-  if (class(tune_res)[1] != "tune_results") {
+  if (!is(tune_res, "tune_results")) {
     stop("The `tune_res` argument can only take `tune_results` objects.")
   }
 }
diff --git a/R/core_ml.R b/R/core_ml.R
index 5846b40..37f04cd 100644
--- a/R/core_ml.R
+++ b/R/core_ml.R
@@ -12,6 +12,7 @@
 #' @importFrom dplyr select
 #' @importFrom dplyr slice
 #' @importFrom hardhat tune
+#' @importFrom methods is
 #' @importFrom parsnip augment
 #' @importFrom parsnip boost_tree
 #' @importFrom parsnip extract_fit_engine
@@ -71,34 +72,42 @@ NULL
 #' be designated as training and validation, respectively.
 #' @param seed [num] For reproducible analysis
 #' @return An `rsplit` object
+#' @examples
+#' ml <- tibble::tibble(
+#'   genome_id = paste0("g", 1:20),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 10),
+#'   feat_a = rep(c(0L, 1L), 10),
+#'   feat_b = rep(c(1L, 0L), 10)
+#' )
+#' splitMLInputTibble(ml, split = c(1, 0), seed = 42)
 #' @export
 splitMLInputTibble <- function(ml_input_tibble, split = c(0.6, 0.2), seed = 5280) {
   .checkArgTibble(ml_input_tibble, ml = TRUE)
   .checkArgSplit(split)
   .checkArgSeed(seed)
 
-  set.seed(seed)
-
   target_var <- .getTargetVarName(ml_input_tibble)
 
   # Split the data, maintaining R/S proportions.
-  if (split[2] == 0) {
-    # If in CV mode:
-    # Still retain a stratified testing holdout purely for final reporting metrics;
-    # CV is only performed on the training portion.
-    prop_train_for_holdout <- 0.8 # 80 percent train, 20 percent reserved test
-    data_split <- rsample::initial_split(
-      ml_input_tibble,
-      prop   = prop_train_for_holdout,
-      strata = !!target_var
-    )
-  } else {
-    data_split <- rsample::initial_validation_split(
-      ml_input_tibble,
-      prop   = split,
-      strata = !!target_var
-    )
-  }
+  data_split <- withr::with_seed(seed, {
+    if (split[2] == 0) {
+      # If in CV mode:
+      # Still retain a stratified testing holdout purely for final reporting metrics;
+      # CV is only performed on the training portion.
+      prop_train_for_holdout <- 0.8 # 80 percent train, 20 percent reserved test
+      rsample::initial_split(
+        ml_input_tibble,
+        prop   = prop_train_for_holdout,
+        strata = !!target_var
+      )
+    } else {
+      rsample::initial_validation_split(
+        ml_input_tibble,
+        prop   = split,
+        strata = !!target_var
+      )
+    }
+  })
 
   return(data_split)
 }
@@ -114,6 +123,14 @@ splitMLInputTibble <- function(ml_input_tibble, split = c(0.6, 0.2), seed = 5280
 #' @param pca_threshold [num] The proportion of total variance for which the
 #' principle components account
 #' @return A `recipe` object
+#' @examples
+#' train <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+#'   feat_a = rep(c(0L, 1L), 5),
+#'   feat_b = rep(c(1L, 0L), 5)
+#' )
+#' buildRecipe(train, use_pca = FALSE)
 #' @export
 buildRecipe <- function(train_data, use_pca = FALSE, pca_threshold = 0.95) {
   .checkArgTibble(train_data, ml = TRUE)
@@ -157,6 +174,9 @@ buildRecipe <- function(train_data, use_pca = FALSE, pca_threshold = 0.95) {
 #' @param multi_class [bool] Whether to construct a model for multi-class
 #' classification
 #' @return A `parsnip` `logistic_reg` object
+#' @examples
+#' buildLRModel()
+#' buildLRModel(multi_class = TRUE)
 #' @export
 buildLRModel <- function(multi_class = FALSE) {
   .checkArgMultiClass(multi_class)
@@ -186,6 +206,16 @@ buildLRModel <- function(multi_class = FALSE) {
 #' `buildLRModel()` (random forest and boosted tree support planned)
 #' @param recipe A recipe, such as the output of `buildRecipe()`
 #' @return A `workflow` object
+#' @examples
+#' train <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+#'   feat_a = rep(c(0L, 1L), 5),
+#'   feat_b = rep(c(1L, 0L), 5)
+#' )
+#' rec <- buildRecipe(train, use_pca = FALSE)
+#' lr  <- buildLRModel()
+#' buildWflow(lr, rec)
 #' @export
 buildWflow <- function(parsnip_mod, recipe) {
   .checkArgParsnipMod(parsnip_mod)
@@ -210,6 +240,12 @@ buildWflow <- function(parsnip_mod, recipe) {
 #' regression. 0 corresponds to L2 regularization; 1 corresponds to L1;
 #' intermediate values (0, 1) correspond to elastic net.
 #' @return A logistic regression tuning grid as a tibble
+#' @examples
+#' buildTuningGrid(
+#'   model       = "LR",
+#'   penalty_vec = 10^c(-3, -1),
+#'   mix_vec     = c(0, 0.5, 1)
+#' )
 #' @export
 buildTuningGrid <- function(
   model = "LR",
@@ -243,6 +279,14 @@ buildTuningGrid <- function(
 #' `buildTuningGrid()`
 #' @param n_fold [num] Number of folds of cross-validation
 #' @return Results of grid tuning
+#' @examples
+#' data(demo_ml_tibble)
+#' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+#' wflow <- buildWflow(buildLRModel(),
+#'                     buildRecipe(rsample::training(data_split)))
+#' grid  <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
+#' set.seed(1)
+#' tuneGrid(wflow, data_split, grid, n_fold = 2)
 #' @export
 tuneGrid <- function(wflow, data_split, grid = buildTuningGrid(model = "LR"),
                      n_fold = 5) {
@@ -250,20 +294,18 @@ tuneGrid <- function(wflow, data_split, grid = buildTuningGrid(model = "LR"),
   .checkArgWflow(wflow)
   .checkArgDataSplit(data_split)
 
-  split_class <- class(data_split)[1]
-
   # Always do CV on the training portion of the split
   train_df <- rsample::training(data_split)
   target_var <- .getTargetVarName(train_df)
 
-  if (identical(split_class, "initial_split")) {
+  if (is(data_split, "initial_split")) {
     # CV on training portion; final eval will use the held-out test set
     resamples <- rsample::vfold_cv(train_df, v = n_fold, strata = !!target_var)
-  } else if (identical(split_class, "initial_validation_split")) {
+  } else if (is(data_split, "initial_validation_split")) {
     # Use the validation partition from the original three-way split.
     resamples <- rsample::validation_set(data_split)
   } else {
-    stop("Unsupported rsample split object: ", split_class)
+    stop("Unsupported rsample split object: ", class(data_split)[1])
   }
 
   tune_res <- tune::tune_grid(
@@ -294,6 +336,16 @@ tuneGrid <- function(wflow, data_split, grid = buildTuningGrid(model = "LR"),
 #' @param select_best_metric [chr] Metric to select best model: "f_meas",
 #' "pr_auc", "mcc", or "bal_accuracy"
 #' @return Best model workflow
+#' @examples
+#' data(demo_ml_tibble)
+#' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+#' wflow <- buildWflow(buildLRModel(),
+#'                     buildRecipe(rsample::training(data_split)))
+#' set.seed(1)
+#' tune_res <- tuneGrid(wflow, data_split,
+#'                      buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+#'                      n_fold = 2)
+#' selectBestModel(tune_res, wflow, "mcc")
 #' @export
 selectBestModel <- function(tune_res, wflow, select_best_metric = "mcc") {
   .checkArgTuneRes(tune_res)
@@ -315,6 +367,17 @@ selectBestModel <- function(tune_res, wflow, select_best_metric = "mcc") {
 #' training. This can be the output of
 #' `rsample::training(splitMLInputTibble(ml_input_tibble))`.
 #' @return Best model fit
+#' @examples
+#' data(demo_ml_tibble)
+#' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+#' train <- rsample::training(data_split)
+#' wflow <- buildWflow(buildLRModel(), buildRecipe(train))
+#' set.seed(1)
+#' tune_res   <- tuneGrid(wflow, data_split,
+#'                        buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+#'                        n_fold = 2)
+#' best_wflow <- selectBestModel(tune_res, wflow, "mcc")
+#' fitBestModel(best_wflow, train)
 #' @export
 fitBestModel <- function(final_mod, train_data) {
   .checkArgWflow(final_mod)
@@ -361,6 +424,11 @@ fitBestModel <- function(final_mod, train_data) {
 #' `rsample::testing(splitMLInputTibble(ml_input_tibble))`.
 #' @return Test data (tibble) with an added column for predicted phenotype
 #' labels
+#' @examples
+#' data(demo_ml_tibble)
+#' data(demo_fit)
+#' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+#' predictML(demo_fit, rsample::testing(data_split))
 #' @export
 predictML <- function(fit, test_data) {
   .checkArgWflow(fit)
@@ -379,6 +447,20 @@ predictML <- function(fit, test_data) {
 #' @param test_data_plus_predictions Test data (tibble) with an added column for
 #' predicted phenotype labels, such as the output of `predictML()`
 #' @return Confusion matrix of class `conf_mat`
+#' @examples
+#' preds <- tibble::tibble(
+#'   genome_id = paste0("g", 1:8),
+#'   genome_drug.resistant_phenotype = factor(
+#'     rep(c("Resistant", "Susceptible"), each = 4),
+#'     levels = c("Resistant", "Susceptible")
+#'   ),
+#'   .pred_class = factor(
+#'     c("Resistant", "Resistant", "Susceptible", "Resistant",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible"),
+#'     levels = c("Resistant", "Susceptible")
+#'   )
+#' )
+#' getConfusionMatrix(preds)
 #' @export
 getConfusionMatrix <- function(test_data_plus_predictions) {
   .checkArgTestDataPlusPredictions(test_data_plus_predictions)
@@ -613,6 +695,22 @@ getConfusionMatrix <- function(test_data_plus_predictions) {
 #'
 #' @inheritParams getConfusionMatrix
 #' @return F1 score, AUPRC, balanced accuracy, nMCC, and log2(AUPRC/prior)
+#' @examples
+#' preds <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = factor(
+#'     rep(c("Resistant", "Susceptible"), each = 5),
+#'     levels = c("Resistant", "Susceptible")
+#'   ),
+#'   .pred_class = factor(
+#'     c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+#'     levels = c("Resistant", "Susceptible")
+#'   ),
+#'   .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+#'   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
+#' )
+#' calculateEvalMets(preds)
 #' @export
 calculateEvalMets <- function(test_data_plus_predictions) {
   .checkArgTestDataPlusPredictions(test_data_plus_predictions)
@@ -644,6 +742,9 @@ calculateEvalMets <- function(test_data_plus_predictions) {
 #' @return A tibble with a column for top features (`Variable`), a column for
 #' `Importance`, and a column for `Sign` (or, for multi-class, a tibble with
 #' per-class columns of importance scores for each `Variable`)
+#' @examples
+#' data(demo_fit)
+#' extractTopFeats(demo_fit, n_top_feats = 10)
 #' @export
 extractTopFeats <- function(
   fit, prop_vi_top_feats = c(0, 1),
@@ -691,7 +792,7 @@ extractTopFeats <- function(
 
   # Take a different approach if using multi-class (the previous code would give
   # a less meaningful result).
-  if (class(fit$fit$actions$model$spec)[1] == "multinom_reg") {
+  if (is(fit$fit$actions$model$spec, "multinom_reg")) {
     warning(paste(
       "Extracting top features from a multi-class model.",
       "The `prop_vi_top_feats` and `n_top_feats` arguments do not apply."
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 0000000..0f06f74
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,18 @@
+#' Demo ML input tibble
+#'
+#' Stratified subset (30 Resistant + 30 Susceptible) of the AMP-genes-binary
+#' matrix from the bundled `Sfl_parquet.duckdb`, restricted to 80 feature
+#' columns.
+#'
+#' @format A tibble with 60 rows and 82 columns: `genome_id`,
+#'   `genome_drug.resistant_phenotype`, and 80 binary feature columns.
+#' @source `inst/scripts/make_demo_data.R`.
+"demo_ml_tibble"
+
+#' Demo LR fit
+#'
+#' A tuned logistic-regression workflow fitted on [demo_ml_tibble].
+#'
+#' @format A fitted `workflow` object (output of [fitBestModel()]).
+#' @source `inst/scripts/make_demo_data.R`.
+"demo_fit"
diff --git a/R/generate_matrices_ml.R b/R/generate_matrices_ml.R
index 67ba14c..69a6a87 100644
--- a/R/generate_matrices_ml.R
+++ b/R/generate_matrices_ml.R
@@ -654,10 +654,10 @@ skipImbalancedMatrix <- function(genome_ids,
 
     grouped <- split(group_df, group_df$drug_or_class)
 
-    purrr::walk(names(grouped), function(drug_class) {
+    new_files <- purrr::map(names(grouped), function(drug_class) {
       group <- grouped[[drug_class]]
 
-      purrr::walk(unique(group$stratification), function(leave_one_out) {
+      purrr::map(unique(group$stratification), function(leave_one_out) {
         subset <- dplyr::filter(group, stratification != leave_one_out)
         if (nrow(subset) == 0) {
           return(NULL)
@@ -675,11 +675,12 @@ skipImbalancedMatrix <- function(genome_ids,
           )
         ))
         arrow::write_parquet(combined, out_file)
-        created <<- c(created, out_file)
-
         log("debug", paste0("Created LOO file: ", out_file))
-      })
-    })
+        out_file
+      }) |> purrr::compact() |> unlist()
+    }) |> unlist()
+
+    created <- c(created, new_files)
   }
 
   log("info", "All LOO matrices generated and saved.")
diff --git a/R/ife_ml.R b/R/ife_ml.R
index 3d72a6d..cd12268 100644
--- a/R/ife_ml.R
+++ b/R/ife_ml.R
@@ -17,6 +17,16 @@ NULL
 #' `runMLPipeline()`, containing all features to be removed from the
 #' `ml_input_tibble`
 #' @return An amR pangenome with top features removed
+#' @examples
+#' ml <- tibble::tibble(
+#'   genome_id = paste0("g", 1:6),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+#'   feat_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+#'   feat_b = c(0L, 1L, 0L, 1L, 0L, 1L),
+#'   feat_c = c(1L, 1L, 0L, 0L, 1L, 0L)
+#' )
+#' top <- tibble::tibble(Variable = c("feat_a", "feat_c"))
+#' removeTopFeats(ml, top)
 #' @export
 removeTopFeats <- function(ml_input_tibble, top_feat_tibble) {
   .checkArgTibble(ml_input_tibble)
@@ -60,6 +70,14 @@ removeTopFeats <- function(ml_input_tibble, top_feat_tibble) {
 #' @param verbose [bool] The function will stay quiet if set to `FALSE`.
 #' @return A tibble with IFE performance (note: this will be returned within a
 #' list along with top features removed per iteration if `return_feats = TRUE`.)
+#' @examples
+#' data(demo_ml_tibble)
+#' set.seed(1)
+#' runIFE(
+#'   demo_ml_tibble, by_num = TRUE, by_vi = FALSE,
+#'   percent_removal_vec = c(25, 50), mix_vec = 0,
+#'   return_feats = TRUE, verbose = FALSE
+#' )
 #' @export
 runIFE <- function(
   ml_input_tibble, by_num = TRUE, by_vi = FALSE,
diff --git a/R/plot_ml.R b/R/plot_ml.R
index 90121f3..84aea3d 100644
--- a/R/plot_ml.R
+++ b/R/plot_ml.R
@@ -30,6 +30,22 @@ NULL
 #' @param test_data_plus_predictions Test data (tibble) with an added column for
 #' predicted phenotype labels, such as the output of `predict()`.
 #' @return A precision-recall curve as a `ggplot2` object
+#' @examples
+#' preds <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = factor(
+#'     rep(c("Resistant", "Susceptible"), each = 5),
+#'     levels = c("Resistant", "Susceptible")
+#'   ),
+#'   .pred_class = factor(
+#'     c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+#'     levels = c("Resistant", "Susceptible")
+#'   ),
+#'   .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+#'   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
+#' )
+#' plotPRC(preds)
 #' @export
 plotPRC <- function(test_data_plus_predictions) {
   .checkArgTestDataPlusPredictions(test_data_plus_predictions)
@@ -54,6 +70,9 @@ plotPRC <- function(test_data_plus_predictions) {
 #' @param fit Best model fit, such as the output of `fitBestModel()`
 #' @param n_top_feats [num] Number of top features to plot
 #' @return Variable importance plot (a `ggplot2` object)
+#' @examples
+#' data(demo_fit)
+#' plotTopFeatsVI(demo_fit, n_top_feats = 10)
 #' @export
 plotTopFeatsVI <- function(fit, n_top_feats = 10) {
   .checkArgWflow(fit)
@@ -83,6 +102,15 @@ plotTopFeatsVI <- function(fit, n_top_feats = 10) {
 #' @param ylab [chr] Label for y axis
 #' @return A `ggplot2` scatterplot (performance metric or runtime vs.
 #' `train_prop` or `n_fold`), colored by model
+#' @examples
+#' default_eval <- tibble::tibble(
+#'   train_prop      = c(0.5, 0.6, 0.7, 0.5, 0.6, 0.7),
+#'   n_fold          = rep(5, 6),
+#'   model           = rep(c("LR", "RF"), each = 3),
+#'   avg_f1_score    = c(0.72, 0.78, 0.83, 0.70, 0.75, 0.80)
+#' )
+#' plotDefaultEval(default_eval, x_default_eval = "train_prop",
+#'                 y_default_eval = "avg_f1_score")
 #' @export
 plotDefaultEval <- function(
   default_eval_tibble, x_default_eval = "train_prop",
@@ -138,6 +166,16 @@ plotDefaultEval <- function(
 #' @param shuffled_label_results Output of `runMLPipeline()`
 #' (`shuffle_labels = TRUE`)
 #' @return A bar plot with balanced accuracy comparisons per antibiotic
+#' @examples
+#' non_shuffled <- tibble::tibble(
+#'   antibiotic = c("AMP", "CIP", "CRO"),
+#'   bal_acc    = c(0.88, 0.81, 0.92)
+#' )
+#' shuffled <- tibble::tibble(
+#'   antibiotic = c("AMP", "CIP", "CRO"),
+#'   bal_acc    = c(0.52, 0.49, 0.55)
+#' )
+#' plotBaselineComparison(non_shuffled, shuffled)
 #' @export
 plotBaselineComparison <- function(
   non_shuffled_label_results,
@@ -200,10 +238,19 @@ plotBaselineComparison <- function(
 #' Labels are applied only to the top-ranked features to preserve clarity.
 #'
 #' @examples
-#' \dontrun{
-#' plotFishers(fisher_results)
-#' plotFishers(fisher_results, label_top_n = 0)
-#' }
+#' long <- tibble::tibble(
+#'   genome_id = rep(paste0("g", 1:10), each = 2),
+#'   feature_id = rep(c("gene_a", "gene_b"), 10),
+#'   value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+#'             0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+#'   genome_drug.resistant_phenotype = rep(
+#'     rep(c("Resistant", "Susceptible"), each = 5), each = 2
+#'   )
+#' )
+#' tmp <- tempfile(fileext = ".parquet")
+#' arrow::write_parquet(long, tmp)
+#' fisher_results <- runFishers(tmp, Q = 0.05)
+#' plotFishers(fisher_results, alpha = 0.05, label_top_n = 2)
 #'
 #' @import ggplot2
 #' @import dplyr
diff --git a/R/prep_ml.R b/R/prep_ml.R
index 4a5954e..cbacc91 100644
--- a/R/prep_ml.R
+++ b/R/prep_ml.R
@@ -73,6 +73,18 @@ NULL
 #' upstream processing.
 #' @returns A tibble (one row per genome, one column per feature), plus
 #' "Resistant" or "Susceptible" AMR phenotype labels
+#' @examples
+#' long <- tibble::tibble(
+#'   genome_id = rep(paste0("g", 1:6), each = 2),
+#'   feature_id = rep(c("gene_a", "gene_b"), 6),
+#'   value = c(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1),
+#'   genome_drug.resistant_phenotype = rep(
+#'     rep(c("Resistant", "Susceptible"), each = 3), each = 2
+#'   )
+#' )
+#' tmp <- tempfile(fileext = ".parquet")
+#' arrow::write_parquet(long, tmp)
+#' loadMLInputTibble(tmp)
 #' @export
 loadMLInputTibble <- function(parquet_path) {
   .checkArgPath(parquet_path)
@@ -135,6 +147,14 @@ loadMLInputTibble <- function(parquet_path) {
 #' (multi-class classification for determining the drug classes to which each
 #' genome is resistant), but not both.
 #' @return Number of features
+#' @examples
+#' ml <- tibble::tibble(
+#'   genome_id = paste0("g", 1:6),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+#'   feat_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+#'   feat_b = c(0L, 1L, 0L, 1L, 0L, 1L)
+#' )
+#' getNumFeat(ml)
 #' @export
 getNumFeat <- function(ml_input_tibble) {
   .checkArgTibble(ml_input_tibble, ml = TRUE)
@@ -168,19 +188,27 @@ getNumFeat <- function(ml_input_tibble) {
 #' genome is resistant), but not both.
 #' @param seed [num] For reproducible analysis
 #' @return Pangenome with randomly shuffled AMR phenotype labels
+#' @examples
+#' ml <- tibble::tibble(
+#'   genome_id = paste0("g", 1:6),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+#'   feat_a = c(1L, 0L, 1L, 0L, 1L, 0L)
+#' )
+#' shuffleLabels(ml, seed = 42)
 #' @export
 shuffleLabels <- function(ml_input_tibble, seed = 5280) {
   .checkArgTibble(ml_input_tibble, ml = TRUE)
   .checkArgSeed(seed)
 
-  set.seed(seed)
   target_var <- .getTargetVarName(ml_input_tibble)
 
   log <- .ml_logger("debug")
   log("debug", "Shuffling phenotype labels (for baseline models).")
 
-  ml_input_tibble |>
-    dplyr::mutate(!!target_var := sample(!!target_var))
+  withr::with_seed(seed, {
+    ml_input_tibble |>
+      dplyr::mutate(!!target_var := sample(!!target_var))
+  })
 }
 
 #' calculateMinSamples()
@@ -190,6 +218,9 @@ shuffleLabels <- function(ml_input_tibble, seed = 5280) {
 #' @param smallest_n_obs_rs [numeric] Minimum number of observations of the
 #'   rarer class required per fold/partition. Default is 1.
 #' @return Minimum total observations required, adjusted by smallest_n_obs_rs
+#' @examples
+#' calculateMinSamples(n_fold = 5, split = c(1, 0), res_prop = 0.3)
+#' calculateMinSamples(n_fold = 5, split = c(0.6, 0.2), res_prop = 0.1)
 #' @export
 calculateMinSamples <- function(n_fold, split, res_prop, smallest_n_obs_rs = 1) {
   base <- .calculateMinSamples(n_fold, split, res_prop)
@@ -210,6 +241,14 @@ calculateMinSamples <- function(n_fold, split, res_prop, smallest_n_obs_rs = 1)
 #' @return The name of the target variable to be used for machine learning:
 #' either `rlang::sym("genome_drug.resistant_phenotype")` or
 #' `rlang::sym("resistant_classes")`
+#' @examples
+#' ml <- tibble::tibble(
+#'   genome_id = paste0("g", 1:4),
+#'   genome_drug.resistant_phenotype = c("Resistant", "Susceptible",
+#'                                       "Resistant", "Susceptible"),
+#'   feat_a = c(1L, 0L, 1L, 0L)
+#' )
+#' .getTargetVarName(ml)
 #' @export
 .getTargetVarName <- function(ml_input_tibble) {
   .checkArgTibble(ml_input_tibble, ml = TRUE)
diff --git a/R/run_Fisher_tests.R b/R/run_Fisher_tests.R
index e0cf7f5..abaf42c 100644
--- a/R/run_Fisher_tests.R
+++ b/R/run_Fisher_tests.R
@@ -24,6 +24,16 @@ NULL
 #' @return A list with:
 #'   - `df`: the input data with AMR phenotype now encoded as integer
 #'   - `target`: a binarized vector of the encoded AMR phenotypes
+#' @examples
+#' df <- tibble::tibble(
+#'   genome_id = paste0("g", 1:6),
+#'   genome_drug.resistant_phenotype = c("Resistant", "Susceptible", "Resistant",
+#'                                       "Susceptible", "Resistant", "Susceptible"),
+#'   gene_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+#'   gene_b = c(0L, 1L, 1L, 0L, 0L, 1L)
+#' )
+#' encoded <- encodePhenotype(df)
+#' encoded$target
 #' @export
 encodePhenotype <- function(df, susceptible_label = "Susceptible", resistant_label = "Resistant") {
   stopifnot("genome_drug.resistant_phenotype" %in% colnames(df)) # Throw a fit if it's the wrong matrix
@@ -49,6 +59,15 @@ encodePhenotype <- function(df, susceptible_label = "Susceptible", resistant_lab
 #' @param alternative Type of Fisher test: "two.sided", "greater", or "less"
 #'
 #' @return A tibble with columns: gene, p_value
+#' @examples
+#' df <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+#'   gene_a = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+#'   gene_b = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 0)
+#' )
+#' encoded <- encodePhenotype(df)
+#' runFisherTests(encoded$df, encoded$target)
 #' @export
 runFisherTests <- function(df, target, alternative = "two.sided") {
   feature_cols <- df |>
@@ -77,6 +96,12 @@ runFisherTests <- function(df, target, alternative = "two.sided") {
 #' @param Q The FDR threshold (default = 0.05)
 #'
 #' @return Returns Fisher results with added cols for adj_p_value, sig_after_bh, Q
+#' @examples
+#' fisher_results <- tibble::tibble(
+#'   gene    = paste0("gene_", 1:5),
+#'   p_value = c(0.001, 0.01, 0.04, 0.2, 0.5)
+#' )
+#' applyBenjaminiHochberg(fisher_results, Q = 0.05)
 #' @export
 applyBenjaminiHochberg <- function(df_fisher, Q = 0.05) {
   stopifnot("p_value" %in% colnames(df_fisher))
@@ -102,6 +127,18 @@ applyBenjaminiHochberg <- function(df_fisher, Q = 0.05) {
 #' @param target The binarized AMR phenotype "target" output from before
 #'
 #' @return The BH Fisher data table with added feature frequency columns
+#' @examples
+#' df <- tibble::tibble(
+#'   genome_id = paste0("g", 1:10),
+#'   genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+#'   gene_a = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+#'   gene_b = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 0)
+#' )
+#' encoded <- encodePhenotype(df)
+#' fisher <- applyBenjaminiHochberg(
+#'   runFisherTests(encoded$df, encoded$target), Q = 0.05
+#' )
+#' computeFeatureFreq(encoded$df, fisher, encoded$target)
 #' @export
 computeFeatureFreq <- function(df, df_fisher, target) {
   df_features <- df |>
@@ -135,6 +172,19 @@ computeFeatureFreq <- function(df, df_fisher, target) {
 #' @param resistant_label "Resistant" phenotype label in the data
 #'
 #' @return The Fisher test results, BH correction, and feature frequencies
+#' @examples
+#' long <- tibble::tibble(
+#'   genome_id = rep(paste0("g", 1:10), each = 2),
+#'   feature_id = rep(c("gene_a", "gene_b"), 10),
+#'   value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+#'             0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+#'   genome_drug.resistant_phenotype = rep(
+#'     rep(c("Resistant", "Susceptible"), each = 5), each = 2
+#'   )
+#' )
+#' tmp <- tempfile(fileext = ".parquet")
+#' arrow::write_parquet(long, tmp)
+#' runFishers(tmp, Q = 0.05)
 #' @export
 runFishers <- function(
   matrix_path,
diff --git a/R/run_ML.R b/R/run_ML.R
index 2ed07e7..06caff2 100644
--- a/R/run_ML.R
+++ b/R/run_ML.R
@@ -1,7 +1,8 @@
-#' @keywords internal
 #' Pulls the ML parameters .json and reads what model split parameters are to be
 #' used. These defaults can be overridden if you so choose, but consider regenerating
 #' the ML matrices with these new split/CV values instead.
+#'
+#' @keywords internal
 #' @noRd
 .resolveSplitParams <- function(parquet_path,
                                 defaults = list(
@@ -49,19 +50,11 @@
 #'   }
 #'
 #' @examples
-#' \dontrun{
-#' # Basic directory structure
-#' paths <- createMLResultDir("/path/to/results")
-#'
-#' # LOO analysis stratified by year
-#' paths_loo <- createMLResultDir("/path/to/results",
-#'   stratify_by = "year",
-#'   LOO = TRUE
-#' )
-#'
-#' # MDR analysis
-#' paths_mdr <- createMLResultDir("/path/to/results", MDR = TRUE)
-#' }
+#' out_dir <- file.path(tempdir(), "amRml_createdir_example")
+#' dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+#' createMLResultDir(out_dir)
+#' createMLResultDir(out_dir, stratify_by = "year", LOO = TRUE)
+#' createMLResultDir(out_dir, MDR = TRUE)
 #'
 #' @export
 createMLResultDir <- function(path,
@@ -533,7 +526,7 @@ createMLinputList <- function(path,
 #' Run MDR (multi-drug resistance) machine learning models
 #'
 #' Executes machine learning pipeline for MDR analysis using logistic regression
-#' with parallel processing via the future backend. Trains models on all MDR
+#' with parallel processing via BiocParallel. Trains models on all MDR
 #' parquet files and saves results to designated output directories.
 #'
 #' @param path Character scalar. Base directory containing MDR matrix files.
@@ -600,21 +593,19 @@ runMDRmodels <- function(path,
     return(invisible(NULL))
   }
 
-  old_plan <- future::plan()
-  on.exit(future::plan(old_plan), add = TRUE)
-  future::plan(future::multisession, workers = threads)
+  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = 42)
 
   if (isTRUE(verbose)) {
-    nw <- tryCatch(future::nbrOfWorkers(), error = function(e) NA_integer_)
-    message("runMDRmodels(): enabling multisession with workers = ", nw)
+    message("runMDRmodels(): enabling SnowParam with workers = ", threads)
   }
 
   # Auto tags for shuffled and PCA
   shuffle_tag <- if (isTRUE(shuffle_labels)) "shuffled_" else ""
   pca_tag <- if (isTRUE(use_pca)) paste0("_pca", as.character(pca_threshold)) else ""
 
-  results_list <- future.apply::future_lapply(
+  results_list <- BiocParallel::bplapply(
     seq_len(nrow(files)),
+    BPPARAM = param,
     FUN = function(i) {
       ref_parquet <- files$ref_file[i]
       output_prefix <- files$output_prefix[i]
@@ -697,8 +688,7 @@ runMDRmodels <- function(path,
       }
 
       NULL
-    },
-    future.seed = TRUE
+    }
   )
 
   if (verbose) {
@@ -790,8 +780,7 @@ runMDRmodels <- function(path,
 #' @note
 #' This function requires the following packages:
 #' \itemize{
-#'   \item \pkg{future} - for parallel processing backend
-#'   \item \pkg{future.apply} - for parallel lapply
+#'   \item \pkg{BiocParallel} - for parallel processing backend
 #'   \item \pkg{readr} - for reading/writing TSV files
 #'   \item \pkg{dplyr}, \pkg{purrr}, \pkg{stringr}, \pkg{tibble} - for data manipulation
 #' }
@@ -876,13 +865,10 @@ runMLmodels <- function(path,
     return(invisible(NULL))
   }
 
-  old_plan <- future::plan()
-  on.exit(future::plan(old_plan), add = TRUE)
-  future::plan(future::multisession, workers = threads)
+  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = 42)
 
   if (isTRUE(verbose)) {
-    nw <- tryCatch(future::nbrOfWorkers(), error = function(e) NA_integer_)
-    message("runMLmodels(): enabling multisession with workers = ", nw)
+    message("runMLmodels(): enabling SnowParam with workers = ", threads)
   }
 
   # LOO / cross-test configuration prefix
@@ -909,8 +895,9 @@ runMLmodels <- function(path,
   shuffle_tag <- if (isTRUE(shuffle_labels)) "shuffled_" else ""
   pca_tag <- if (isTRUE(use_pca)) paste0("_pca", as.character(pca_threshold)) else ""
 
-  results_list <- future.apply::future_lapply(
+  results_list <- BiocParallel::bplapply(
     seq_len(nrow(files)),
+    BPPARAM = param,
     FUN = function(i) {
       ref_parquet <- files$ref_file[i]
       output_prefix <- files$output_prefix[i]
@@ -1004,8 +991,7 @@ runMLmodels <- function(path,
       }
 
       NULL
-    },
-    future.seed = TRUE
+    }
   )
 
   if (verbose) {
@@ -1040,6 +1026,16 @@ runMLmodels <- function(path,
 #'
 #' @return Invisibly returns the output directory used for ML results.
 #'
+#' @examples
+#' \dontrun{
+#' runModelingPipeline(
+#'   parquet_duckdb_path = "path/to/Bug_parquet.duckdb",
+#'   threads = 16, n_fold = 5, split = c(1, 0), min_n = 25,
+#'   prop_vi_top_feats = c(0, 1), pca_threshold = 0.99,
+#'   verbose = TRUE, use_saved_split = TRUE
+#' )
+#' }
+#'
 #' @export
 runModelingPipeline <- function(parquet_duckdb_path,
                                 threads = 16,
diff --git a/R/run_ml_pipeline.R b/R/run_ml_pipeline.R
index ad9f691..797cab0 100644
--- a/R/run_ml_pipeline.R
+++ b/R/run_ml_pipeline.R
@@ -74,6 +74,15 @@ NULL
 #' `top_feat_tibble`. Tuning results, the fit object, and model predictions may
 #' also be returned if `return_tune_res`, `return_fit`, and/or `return_pred`,
 #' respectively, are set to `TRUE`.
+#' @examples
+#' data(demo_ml_tibble)
+#' set.seed(1)
+#' runMLPipeline(
+#'   ml_input_tibble = demo_ml_tibble, model = "LR",
+#'   split = c(1, 0), n_fold = 2,
+#'   penalty_vec = 10^c(-3, -1), mix_vec = c(0, 0.5, 1),
+#'   n_top_feats = 10, verbose = FALSE
+#' )
 #' @export
 runMLPipeline <- function(
   ml_input_tibble, model = "LR", split = c(0.6, 0.2),
diff --git a/data/demo_fit.rda b/data/demo_fit.rda
new file mode 100644
index 0000000000000000000000000000000000000000..b3fa4ea4ce384612b9300f98e5a8582fb52b8e9b
GIT binary patch
literal 24156
zcmV(xK<K~yH+ooF0004LBHlIv03iV!0000G&sfak)TUk?T>vQ&2UKVg<Ze>RpfklK
zA+`t`^LEM5g0JmSTuuD9u`sR7lvVPfxsw3wsi7u2d)*JnB3liS_#LP6kc@u0Y6xMz
zdgH5iPS4c|CUbHDvHW}$2gZZdYB}udY#u=jPz}8ERzj*BnMU!+F$R?OF5*~M`g2uV
zRk{&#(lotowWxV(oqm1fpPtEX<-Y#~)f1DC_2yY^d#Q8{L3+4Rlnd-@c$S77_Dyvo
zV&XLlteeZH=>dD8>&bEO9atcffcwOQ8vt7c1emrRt8jPx)M1P#uE_M=jx0KG%*8<8
z{vT7f-DH%u#p8jC6*rvc)5fP^BNNGFT=0ZWtQ3i?3Nl&F=~V!d^6S&86Oy-_whDCu
z&*Ty0jCb}xgs-{E6woyK01g_Cg@Z3}_z;4VT{zvkV=U<10NZ%_ZRQ?ue6rU5z2wzI
zB*OEy{a?ib%7`Y0!<V43d8hxe_w%N!em^Fv9U(t+@$cpvvGIL6xF#6!DJ&%CfQ;Dr
z0g{DjYtd&CSCG8|b)qT|yIH3nK`;)Zv517Akg0SS4`fdGX{`AykDSjq_44Z$Q0$m(
zxeL}Ch_kZm3bYo%gAq*7op=%(d~*-Vm4}c<vx<;PfWg}kniKA{{N#R;_~Xdu2~3^y
z2a-~Sg(5JEXL3v6pp<OhC6yg3u8xyYDeHp^6MrqgFkqgRIY*5djD*{3631q6KRLE+
zfhv{LkQZ3JAW+BbuXrw87Q>n>IpYdAy{0%E$^+a8hX&<cYkr)I`pWnI1zSpY$WS}?
z24n@o6F+;VD0U-K6}^M{@h^oV%9FTvG_|zp#i^}ol=uaVLX558`|Ml2NHk&+`c3^d
zWlNcs3M{TZjb9bTsvD9_eKap7-^b(i1e!*-cq|dkR1z|yu$Gc1j6KL(w1797G@ywJ
zPu-S4435{_&Hb*HcqLMVjfi_oX92{_)>G@yn*2aYyZ;sIrz7;450<&0M<8&RAx>dt
z#myLp=nX*O=*dkFI`ezmtC9#ZMys_=e2^8DH)n?E4zR`ogi%;Mrsh?$^-)|hqF0*K
z5<Hmj>x-*6GNOTpd_qJRcnW*yQRVmr#kGs^kd;nHSr*%$zp^d1@dV{=6oa)ukgV3u
zX!C8#&o^0E16?n6S(O<@CNpNqU_~J+amhMuKI1ke+;1VD6pQtq)a;~=n{hu<uWMri
zndN4eiHS(XAr63|Wm16r0st4Jj;7B*g$ud8=AM{J?JlF-hH_Z%_QHjC4>QpS=FFQA
zm1tyLYvy8My5<5<2=T=j<Gu{$j~66eRo*Lh4mk&uz`*b+BxnRPrpRF1f#dO@U9H=g
zS-MCDyU~zcd4fx$cGFIF;C8lrv^I?|*djK?qiTBWnAA04O0?I;S`BEC`ZrZYek8Jn
zs<d<BaD#yD0=T@N|MwoanKzgRT2)x%vp~7tfar86kErwr1xOlS1hk@ErC`1h3}m0K
z#~7rkUQjz7@W(P!Rz-D4UwANHpB{fZmQ~963~8Onju3A4?@-u1y=$|_!evCnDa(hP
z=YVCw08Py=Zf(msTL=b(Qpm|+%C9vQjl4xw)$GM=GeqS&u@XOHaXaO=LpH1eBr&`>
zEANw#zN8?k_5cDNxJb!erLG5b%-Bjm-Z~)QaWmu0l{9AG+{AQ{WASN0L)*-|I9RE1
zlepc9wM7iy5P;|<72x;n3(`<sjOlEJ1cVc}0RGhxR^rldQDv}*(ROU`DoXWB)hTG>
zGyiFTJ{Y~0iL|GP+pWqXG!+;UUau!o!fz)@I0yHRzG?mHMxhHzP+cquDZL>$oMPjl
z8<ad~Cq*z<9tb(HB*BHc({dCV?|)N*k={Jnc_aY0VkN9oDntl8z88|haX(}Kp&6s?
zn|kExS2emZUX13R3lqAm$_97c;C~-LwKnZQsN_WdLqq83>4;Y#7Z%)h5vYGUk57YZ
zq5Q8!P^IrwAetfk;NlfKA)_zB1%K8I0o9Uey-3CGropP*wz!HEc55Bv>@V^1Qb0cE
zh2>gI8qP#Lry_Nwrx0m`ro<o-13ah!JU{1UlnN^%oaDfcw=r{fea3Iml*NaOyq^!#
z(GB{`2GW*_hp$S>$X$M>G@%3D{<5C=$dtETO?`WIp;8je(4<415pCnK|J(3mpi7BP
z>YaFbKMu_miw%*;gD*;HwVwmEzIXJ}bn+NF+88Wk%@21bcB6LZ1j`gFRm;60&%qEt
z)#f$YImf9uVqtlGz(d$tomOV<cB=nLMohf2odFh^h>yyM;q~}TPlF1^_q{<GQDMwX
zyf|suW7W+A9n9Zsi|^JFdO$$A>Dux0EkXo=tN?17u8Fh4S))b|sFwnYklfEyWmTWZ
z_7Igz5=hZKtiUfa{{H-aQ&w;!N7S90dqIYf#qWGRNCH&Vryo>stf{el!&Og5eT}22
zXG>jzA&Jw9;nJ#jke;hqt2ojLYidnccn?OCz~=(h2Zv%OC+BFHgOszD;_T?(k%)7U
zh%Dh>a!8s0>-Q%WV{hb2QpT)I7Ea#jL5JaGyvFi85Qg{AZz^ZAmu6n%C#=WZdMf^>
zx{jL}!uNzz-x}>4dqB5s>>8aUcS50bum+Mc!wBlbQ(zckZ$l2vQbxLxI%$|bHXp6i
z+<e}$1xQ+{Z1I4cK=^T<#R6=zQKyf#45%$TIJRhdL9CW&4lv*qF<!OZjFVIrqo2vM
z<_Ca!Z+QlDRcshpN%g|{Z*q8{gk?5%rXRnN02V}5hi7!}fg-1U^IxDHiyc9KxHFG#
z>dp(Kg`2YjF#mQ{TPaL=jU4KgM&ImCF>wMRYkb`ej@^T67_(qaSu~R>@hoKLk*pH~
zrz&OnjWvCJ>j9~7LGr)hzyQru%%h;sU}FT`Lp`3;)PVn_#?$jgFXD){>Xb$8mlB=t
z37~;z5-70CX}|B}oh})4V(p39bm&NAY~K5!fMYcaN^hDuDo~@)$4tMrQ2VLgVEBU@
zl>Ag;e#h2E3sUr4(X6XrtH5rm>v+s>-q`pGBQEBDe+NKj#hVIw#-fd%suTFcn%<+R
zi4LrCQ~jdNnlKKT9@eL0EQY0K#sPQq{<elRs1kb8<dD@)j2`3^5+k0_ZIZYGvF}BL
z2$XOEN9F)xMh;NA1ArUkmmV`hn}=(~Jyaxn)Qa=GLI=M@CV`C@hH9<&NECqs_thd)
zJw$e(i@ThJKZAYi*z6>GFe#7(=@E>BXcDNZ;7s&zy+`Vcfoy2&KAt2RG^6_wVIz@r
ztRi!<N)Tw_!VFvdQHhp+@vm3?tP0sP4t4GlQo#^Gz|M@fPB+yNK~w49bN1MHM$mJD
zSqfQ;GJMXyBeA36R8yA7v-YtVLIK4sC5Z+y94ApU2*c_MTL{dYYOgob?AE_CR6^US
z|5^73Ke2&&;byw*Sa*IHWkxB}nLN_hMa$ClNM|H+3X<0l2YVR8q~+cPqfh6@SrQe&
zI-i2;Ou0P!?-kv-wxlNVY(p;rnVrX3r^QhM%bmbpcg~K$!TE6BaPXO6R_{XnB;br6
zPgM^{_ux8Mn$|d@(J>t|G697cFnDH#vZC=lSRjFNiC<CJh=D)l&$5Xr)TG{Bx>zJN
zJ4gEyqDAoGm;_`}g3#;?WArzwWQeux9UCoiuEXgRb6JsppxKZJ-lb#%T%h^!Px{aC
z5*9WskV&rP>StUQAhVt#7A)M?eZ(3AXv2`mZCjT(y%b0YwbjZFSAKM%s;Ed}9h>uM
zcbb9y2nt6UODQg?3~E47soTe8-wy=~<Ekiu+#ReK57995oHh$-ocbzjz=0NvGI=a`
zbTgNw>YnLRQ-AF77!;Vb)#?$@lv;dBCjv<>*=rS-B_l!8YpJC+*u=JB17B467_+=-
z{g4bpv5si=>uboCnHMmB^S-rD8b9CFJpekvgJVM88z%dYsrA8wQ`Ytz@$*~W%oOaz
zVxrDX^%}k8b@17}z-~zmXhtK?JIfKEEHQ?%H0#nuF(NU?FvMX_LKPZzTQn-`Wt@Ma
zbtJA}h}#JO0_#5NGoDcW`{3VSdPklbTXk$ddcL@u7EG%KP@|6A5VpqkM=~C{yXc*b
zxJZs&H7LI|77yWb($*4x`vtlF0)+ZWC{Vl}LHjIILjbQylP_4{MvL~cUEiPyO2D1d
z#b^Af@<F3Gl<}5|c~L^y46?U)3;`kq8L_TYchNqb(AQ<CUH)dvmD35b{BC_L-O2?g
zn(A;(H(+8up@NtY;f*3FRNYDCKr8>8S+aP}E&`7y@_~iE!#4EnXk23a7KSnQkVBaW
zX~VbtiObU0;AnQr2wF3>k<ALGlCNIS(PrKT(13o61zevP=Zx13avzc|DdsdOADFB)
z{eC>dUIGCZ-C$j;-Sv(onqaf%ZwVv%fjOVv4$Q9*uorGqWKmB4am{^}&SE3^d2Q9P
zccvo<xsFRy?q}0?3*#zj4P^a6>=RKyh>uK6If|<eSw-jf@!WSTc%)snt_o~iaV4xo
zP*71n)TyXQlQViAkYyqO(gX5_rn53QJEP=J(tU?u0o66UW|g0w4$VQvxnCCY54S|{
zj4cA)X`eBu0;FNwlhUzkxJ@hlMtdZ2Q4$y{03A2Z@NARv4w*xpjTWT&Uu^WxBUJH=
zb%+O=)Uca3Gl^uCQG(zGOu?xNt>FT^YRR%5WlmC}LSwr$1b4ead9~bYC~zuZVA_Wl
zXI(#Ou}Y}u!+(W{Ft+Hk*V%qIt<JUY-O<9L;@u6F?$7>_{Ga$2ScwmC+oE7hXk`$|
zle_3bSf*RY20S*(sE3qbiUY?A`4bM-24H2hI;bqsY#Fe33OJHLLYpD_zf)*tGFFCT
z8#i2-BGbNaQ%6{s1z9F)3CffFwr)gIkvn?k*;mB}mBKYfeu{VkP0)!bVp&;6R5UUV
z9=q~lfBfOk+i3xFjX3uonmAC807^&-*aMBM^7G{%tel%6bcUvqpDVSY2}C<*w{4v^
zp#QUedMdFyiTu~q%~@ZLaCxS;{r$(sr8(^g>h|QwPm77VWA6UDg;5+rme|8Jfp&xv
zLAO;w!+_yGlUyLgmc37BWR}#ss_)8;mf+S-X)Lp!#&n-tA-_I#y=OtotvuI)>m(As
zTBpER5;)HH_g7ld^c>7vaf#p7*rMYYa$=#vbgTCYG9t<W=nLtpn?8*Ecpf3D<cXj4
z?<krG16i#dPvVrQD9+OD2I~aXl!g<+4Dwj-dKSGbT_ro_k2FP=r8kN8#_3H}A$yA+
z3*4Ukd#h1%mgy+EHQ9TRPBeLwF1`p;W@5P1Q8?$Js?!<r3b1@AsP;6VN!w4?kA9fL
zV7O&UNHw_;zuB##19C`;2txMy_=VCxP}9rLt)$PFIKYp|<8u6+4{k&%4e4nL`}PLf
zU$4M6EZ=w#WH&DVXm%7Vd>vtT7s^)^C-2;GNT*o%*#^daRi-ZrU9cQp&9ApLXlYWj
zFB#k=Sg49p%W2Yt6Hwz6&BceW-A&aS&_neY$4r-HD2Qf*?cG93(~xqU3e;CB)aoQW
zgJ?~p3nHQGl(|aKw#iAwXmb?3Tds_hChLlG*v>p~!&6i9PJDHW4F8l%=hwcp-{>R$
zdC&EQKd8S@ZAp&-OH#Hh;DKwKR!XWKE`_M_4*9IL6nBPJ$~5ZBfn0nD^NWFmA|b*k
zv0hNwhMpvs$y??WA-;%NdrTse6WsIbyYSsQBv05%i&c+0YOtAz2F$t^)U*wyJP!<$
zUNr6S!|LCH?*|~OwX-KCz0o*xb}v~lMVrD4;4VoYOuBZhrhosEGJedjES<9;H@SJw
z`L+Um{u+jOXTQrj8FsiU@zQ8rSt#kl{vVRtBLPXvs){Xq>ImQ`PSR2b5ghIl3EHR=
z=*RB&Zbq{Cwf~wR@*w({L%=6}X>B87M(BmBBDEejRI9230K(vcg}u-C{va5OySRy?
z=0}#IR81i~6&ny#G*dZqvg&80IcoR%R0F`p#eTsKI$yLez`9L?xP`6f7r_=*z6QoI
z)(e~q^JfYXLlcMk+BLY~<0KXM1|9Cg_HQYN0B8`rftfllb01_1-qazMfadJ$1r+)1
z03<R#58aC4E`_#-f(95Q<ZI)XID!!1Rm{b!a^=FH$Qz<NZGl)s9m!z7(f@g#e9^r{
z*mjRuzkL8V!&Lsl3Y06Jn>f=+gJeRPi_g>C=o+mmyfqv@Zhv34a-`W?OBk#gUUz*q
z3DqV^57nfNEjB8X9DYozypj8o%bbOVZE0vwhdFxrp-9Tbd`gUi>#;`mDpWk)2>SPb
zZIv+ZMMz21e^t|g<O+(A^$4IwlAGcw>wUq?GgqD?;&GEiI+bJ!YA*w8y>a`d(Xluf
zstm6S8QIz=ZA4FOO>5ZR9HB3<n@Bj`e|PeMl?XBwAk(tO&xnFTts?sjJEZ9_zx)}D
zX3<rtjFw;WlyIfp$~N01HG<RKz13?d+~9Q5{_*7FI8mdjgAKzGrA?bSF+cJncBL!E
z?#$|zhnk$v+@FIT&k&p{1_h@UZ=6tB)ZpE#aCE|%K(K8ulsS)|qXs(33R?Opkz705
z8G(Jh-1OrOysjnbXJ}3^ZX+Jrkv7V*&s@Z*2K-Aigy%prcxp=Ht!pyl4!y1;);OFr
zRkQ@jFtWCqawi&{gZH-;C4uQ-AZK+FQBB6-R4?C-q3JgCwYQumqsSq^Bha0H88&Zt
zwr>pqs|Mv3{nA**9reCd*;Cl2iC#@6o5ux3`U1VxZ<VD}SbgPVk`EIPWRL##+W3l)
zBF-O*Y4&%S%qJb9c)G%t+}A7IaF6Cp`aqdDF-cM1k*_#e2fg10pd($VSugRI)usS&
zY07Y3#|sgLE0%xE34waq*~yOh+&l}sDF$8ibP4iu2xW%v!TgU*Q7^gO-xVAhY>FtM
z`xeFb)fX95dokNGuAWf$6KVUuG_W4KOqTsQ|C00R&W$9REgotd^Amh5X_kf`(GLP9
zRvZpMn3#!lb*!E)6i1+YgNaK6j@ix@Uruk^^Emr)Ii6&x6R_O6nWyn7SMUf(@SRE<
zxsJb|ifymX(W<&m`3kF8`U3E%=}K1KfiBaBD=OeHePJqULqX5nJInyvSTN(N5>&D{
zOBuhnuxZp47rmQ4kl@hU<&;$*ufD!F1bYNomzS`Y{5vfM?I5zVcVU1Dj=M)_D;3lf
zcL%Q<v@6w>M<aVOR%@N{waWgqq4S-%M1>Rdj2wj0r~1hBR3LD^ap#jG5++pKjFz*z
z)GuMOm|7K3o`#Z``a9&0NO~rR3^#iyD9G^I%6>0ZDZ*r0W5BsA9fnX7<-?+o6Ye3(
zd-_MTb*wCBbH{I7E(mqn;+=NFLq{U+pC41q;_uo?TVSe}a+7sow;pP}ip1x?%!}?;
z*o`r=0Q5lC7`O3_tib#5B1;jeatuzH?y_RLj=I`PC1E(b3xb?U1Zdy_$u8mIMp<om
z#paX$UoAAbmP7RXk;p@4)r+#Kv;#`3$iPokcth~D!*D7Bu2+||Ui_@C?Fo%OR!r&*
zY>--rM$^nnLqd}<&$oPZn|$VWg8zA(m`5f_tpOD!_e2*WQ8h(!TIK8h-<@#ic%PFK
ziNM9YFa6WMqS8KnT~bBo^Urc2!wQd1o)<)sMO^I5(PN8E#HLZ9PjC9v?vj^RKO*R*
zev1?#)%gsA(0zME%_~Bvbe8A*ae0Ea>_6JkE6%ZHrN0cHIkZy30=ZUr2e-<?<i9r8
z%V;Ar+DM0j`N~-7|FD{zE;Irn>`nbZP$sE#7>fq#Vq3g5Z6n_G(D<$cH*ybyIJTw3
zUp@wePoU;4pwAHPYdWVmE6mloiq0?2o&Kpfw4>V8Q0739dq;ib@?O_=hQ_YflLI+e
z03aVx&n}ujC}aI}dlGH3_dn5T)n7VJa01tyb%fEWTQqrU3Q?E797MZ!79p`dr<j3$
zkaOvk0+jt&H%5y76#2P3{k^`QYUh3*O{4yBjA^1ubpQEB>y7UqW!1KS&pe_x+y2dx
zT0VQ?W_CZ3c1wsx3({%UpA6>lQyYC=U;YnT&p=<I4rPW8+NTjItw_}6?g=5zN0K~0
zr@-7JY?SG7oNngn#f;OiY)2Fb5MF!1(Zw_7reIcm#leoiE-a6k5#ghMq>cXk9wgyY
z@;dDkB8@JJ$uqtT`?^h7QJNc*Ur8;s=$N*~H*5wx`m79(Ak$leYE`a(O&v1bD(pt&
zYh4YIu=dPnB*DwqQ<?X-+8*^TUa&=d_Q5uIZZT2004y+|@-@lsKf?kgG$@%6|Na>v
zC(I<G>K%<vcBcIJxfYbs8?VtYmK2)PSl}{zROy>^maT$>oC`~jLf&{}3W7=Iy>ID3
zneGfjMLU)~u2(%9rqEjrmmnTiy4~A2q(-Y@<ogajx|xt7>b_9jP?hL$jW9-jd)oul
zvHhlK1RPtcMnxc;w?KG{?%%VLfWYh=`3o2Feq{-@_N=#rsyL2AD1?YTNJ!Kg;3HmI
zX|eewe_NsDFwG&-Zq_AW>KIu%%QrAqfF8V=KdzCE!}z}!nyh8mvkI}PUk6z~%b1=_
zX#*kBWBQKkp6$Hwg$lQ|wKS9$0u+dxC7Q|`0-u~4NNoy3n}R(fj{&}VR|9@4<(-T<
zvs9Px8^1syRP{<GM!}2?$w<MLyUF02maiMGUT~{CNe)p{|CmphCj2Y04a|~F{E1SI
zjJiT83X<D)<|z0LT{K@lLb(3rd(?J@Yh%l&@LrHIyA_IoT)v0&c7!adY|VaDCBN6q
z)0NEBYyEhl;0Zq<Jrfi|cHzdEv_V$c$q&sCcuPnz+5Ys$tzdMx)FTTJ_b?4!dw*tJ
zs0)=g6^3`3pET6|TKfdGm;|u?bGbO)Fe#-hcPadibV__&Lj{Y%@Lxi*`~9oRDyw^H
zp4nc540dj+u2yDqAwM4-vt6PC72Utyp}J?0pzuLB-{xr@OOnVI4q&OK?x@c1L||xX
zlgV7CIM+u9P7kgEk*jE+%M1Y&Z{(=dIg#pLrWbNqL3Ar&U0#MBtjKwV#PHib61eUU
zKGv|!R-UWXo!J4{aeNv-OrJ>;+$K4hGbKP?sqb}2z@)j6Hy8_!+O}uqNw(vRnyzc0
z&6VqNlph1itKxxU`KpVGSj1(lJG~|M-IP@Ye$_noESK5zkXuck_Ww0UB3|520_iF*
zrOX=U{df&JA)<!o)qF*JGyT|^3f$LUPASLQuD}Ciloq-Vf5@uFE5*@2kLS_L;-zdC
z2&W5wfjakB;FR&d#O$gK9r={I`{QJRA>><Ym6LP-(gabK&H7Wpg3&sIPFUdK`YXVW
z#9hiHBLt*en}0B^6vJ!MWjwTyQF{lFQw8eO4^|DXlJ$f|@>EHba9pxZ^oJ_+E+K+|
zwDoZk)-NKw3o+rOgYp?9!*LtN{>C{@kIK;)Q!?D<P>n<3)1wrVA#Z-eqO`3qP~T`V
zGhmkW!OVRrW0ee^N8{(kX#$0~_HkUh{H&A0nVxpFWSIW-HP1=C+w?WJ3;;R+(g3;{
zkZ)nOpK0a?m7CuMgVZ^MVRFB@^3pY1IzeXS6Mx94z0kyX84tq|qsI$D_q1hUFmogW
zme*JjZqEEUcj}<qc6J~??W3kqqZYkq3vG5rnKC0ML$JH@<@v(tK|Qys$|h;?gO`k^
z0VC~G;442EMHYYR;Ho;{_Y4DZ6(#&nT!Id3PDKpv5?3_*AyNm${VfSc5^k8}nCIWy
zg(4Zf@!-x5yAA2*xy_eh#Z}9)a=_T~>eDEDY7orr?d)%T9S<SRdP=@zBPwDO5ozb?
z!&ZVL?x!Bj-lmR$jv4n<IX_$?lr50cP%rIG?sg^Y5g9S$<Rnl2I<&s*wSa}AQeWeJ
zoN59!MjAQHfMgwyq_Lq_N{TgI67K_@lKY(`to`mx_w&Z{AdNwsw_n5R;H$}jC62$M
zbkO0HeH8hS1v5z1d=AuFk&->V7At_juy~`)aLlTiR&$9$s!S)O-HK>Tq|^tMMWDp4
z2IQqozFVqP#L7_Uh-9d)fFszGQF#L}F!ryDFz^m5G5h^6-Eo?zc}ZcwUqq1O>91Qa
zIQb9tYI)xCFvjJ3t3lV6B66Yw@v2|ZLFhoMy<<%>X^i&n_BwHSi-74ON!pDRiehGk
zVDwG>#<;p@kK;}%-ni`(I}r!-*z1dnp4wsY+Ll`Wi9R>vk+r5>0Q+_4u|>%83V0xv
zm+t*#U%_V*s3+o00GOn!v61kwXSuvXyq@6lfMj<|*T|>o1nqJ!pNNfE=RBLeeSw8*
zd&2yY(Ag-NT19H>w)7$#(RkYrxdEVFF_FDgjK&4x$`8J)5=Sdf+}M(J7Kj<}tEk=x
zLnu(3Zdv;!Y<KR~Q#E@JUHWnIDW~yKv#HIDozf_{2ixz$=ACE8o(Q(m?J(+W?X1hS
zrFa&6X{Z#$<4+aHSJv-53MzI$4h|$jqWL?F%c*h8hOa_#7dWzMw%x&4X_;Kr&^jZ_
zwUxB0kOO{nB>BXvksY<-<vs})5p-(3k9}2~7M_l&ZCq<sZ{|}%UKb|i#{J~of<pcZ
zQ<Rjq=i2wqkZxQ%QqvNE*oFJsPg&#84zbEODu=)Ij5t|>ZR^e!X9wI~Nmw}f8-Gh!
zRWT2#<P8FS>?0JSM_BaJ-N_wX%9bTIDL9_;*_Gv-;RskWB=x3~40wb*Abr>d<RUFz
zZcHM9K8J9P0`?<o=O+x>35x)cz7wLAcf?gdrK|vklSRMs2XzA{i0a3`8nP&#^?sV$
zfAkNT__Nn#*6j#YH<5E<J%VImBtR)H9d*oFb3+Pm?Vt`)aNbuxa5VTbQzXgGK{gx&
zqhamULGG5T-9oCSY7<@7uKcJ)vK0fBSPLEx6=IF2+P)SrG`hS#24(5)y*-ObZZXH-
zn~})kblX-UzrlB%9l0FNlI8=#R8ZNYz#-C?a6OW<^jV;dY(eN`%e+vc;Z!U>4X2!A
zkmgl3A-SuN8jfdfif>Z9xSc6vVDcNQlJ_jOw{B}o;=~y!r;5YGM7`sNI?Lxyks!)C
zBA~y`q>*1f<y2~uZ6ig>_Vi3vl!zR$IBK-WdP~c_bZ9UzQvX)^sZeO!aD<_XRKEl0
zn_?%U?9vUKwF@W52Tg?c&6I-SvonWmPdg@FW79zH?!<~Fy8f?tpFBZ-O`HwAD6b2}
zWwWbPdRfz0as~OWhJ5>KtiMH!ik{md<hj{c=Arz5QS=j{s{8@keh`7}4qS*rgJ!pX
zC`_ZQuxpEiTgjB@3in}QNT~}30$;6dM*BiPmljpb74kE|d#=?m=1f=Ezm&azCL1!W
zGR9W%&8c$dq!d#H%-sHN1a{=PAi%(^e4;gYR^#cMH;F0DubQ(bRR82qPi^!;U$Q}b
zTuPYw>y_JOkvcggsgQtKl_5c<9sMv@1<WJ7lZ$3}Q8jHcX@eZI<-wY+gtrt;k;v|Q
zONDY{)pk-<x=y`7#)qs(r3qU2b$UHtDA?@w3s2J;GN^{Fo+!khve6#ateMBYpn~^|
zA0R;o7`Rnn*XM@jeQq!la7Qn4XEbmu5uPJqWGdTN-XmQ@-Fvq%NBckuzNGv@1UZ_x
zQmZbDh0z(nS}3&1P)H*j&s*U3k^lzmtVt*2SZeO#ccUdRZ%l`zXWX^6n#Q8{QNtc?
zeA!l5s4({Uj2!r9t285^`}z|h1s{g+t6-nsIDtRkAiX3-<#L!@_Q{Tte*v|8MWPIv
zuQe@r6aA)a*ENr70PnHB@R@cK$8zc}bO?o)Z^-e2_7_z-@B5`2bb-;_VHyUiLJu;b
zHDoP3Q95TMjbH)?+{B<7i(2cE(p{SY-Ty#CyiY0Esuw&X*Su8a+VLl!9C6qia`|(<
z?BtKR<Pa%4Dc@;ZoBqt`eg>EfrU1MidVBpQn;>nIeAWtv#Ix76`S<i?i6?4@d9s)0
ztB$&8dOZ$MqjP-CEK=CtRrGAildS;1iK-Ox?#yayG`sKA*jq5&sS<3UTB(Q(y+c)R
zaGIy#SQ|KgW{~>RsT|!;d5~F{cMRw)2eH_VSh{H=l{dJ63yCHLDua#D#87$#S6A<u
z!_p7bBm)K}wI^8@%K*k_*BfUF%o+MKu#(4;i8L(@d2TQ*p;gUkGe^D;C3wvTNfV9)
z<tkcFuxwb$={S7bq&FgI-#qiU(9z)m?5iB+?$b2xxXU78qkfQ!*oj(hh6d;`Z*|>@
z<(~+@<!d!;a|DW|L5<~=`E_q^OtO!~dk$@C-H?+Z=97vmH9O7zO|v3^Ol|*^RKz0N
zJWH<Dckvu7_i~yu#1&mHO>N!=A0sj_{o*WR3>2cYF$+W{MT@*9it!^ZIGZ42zU+ya
z?_KmM8qd>h{?hg&N|rlH9t)w5&Lw9_T*Du@aLfSjvr88#KW&nESQ*t%OibppFU9}l
z*NlZe>ex2`9AWjTn`roL;-x@)4<-2P7pjG0VvN=5)m~aK&6i-GiRqPSv+`^H#r9RF
zB4`_G0*ow*^(q}6#}kC4crM-ntF;oi*gPx#6ovP_nm6%rOLL^?M0lP$AH;}YEi^st
zoh@m%1H3yfu!(<BZa?_f#BoFtkQF?ri|I<G%b2bO{kgb-(iYD_lJEFSX0538Rh0^m
z-;`=732eS5>)pC-h0B_Og{KXvcS94p2lj(+5&q^D_tD@C9T%|Fu@6m|-N_6kI7U2N
zlyt(Zkxnd%loZr4y#s{AX}OeId_+R~)|-GOAgL}2dUT}}OhuPi(LcFsTQGFA<pEs@
zW&2#AJ9P$I(uv^3jQC3T+%V{>)@B!6!La`bkR;ZbmRx6h$5TmWhR9rRw0f@S*FT-2
zk9>_$!?;x>%EiQI^i*{<b!*c*fQzd`83R2=S3{<F#+W?;(x0vmTf#C+I-ep)pVecn
zI>EA6r9buIsMU+R#3zxJYY-LC6NlWRSeo3199g>!cl_ghuf;^0h-^N<@gIDw-B9mt
zt`=PMpDIGb)+#<M<N2uC2dx_q_EdJJ_tG09cOJ{{&=*u_JN*SRt-Z~l@pP!EYqkdb
z!!p5#u?YZ?$zZFunkD@bag2sExj!OI_}ym-+b%t+dxv~hMXiZ9Lw71yovq6V_D6wM
zN(bLfxsR-kx)$+&=6xV|j~|Iu99-4mT0kHC_28&uJj(5_31W(zk<kXJfgvMC4H}bl
z=+tjIxou7$*<k0#KN3%rU63lw7x_Mm&zLH|*y&fm`MYBcg<?FG?<(WA*~JQt9LQ`C
zfaMDHmQhb(i3PrJQ|+<DsU#1hRKJbj#uInft?>PU`KT-7JG}tg;3)2}t=B8CDr3#A
zBmb3s`-K6p4P><f_FLxsou^2EDTzbTt<QNWsyEV2loST$V*<&F&Ns3)o9q+~N_~hB
zkLrBRA+|hOwrP6+Lx2~xc3yH)$U%t9A_1q~HHyw{a0h-Fd&dPrB`;!<*!fk|`0~xW
z0ch&u*S28bF<>%Y?8UYa0O@!3^oqH&w=no2y4gj}B~hg<cAuhHVA<JcveU@$U9qH4
zO`fvlEqw&taJzP;Z|2nQ!YBcy%4)EBZFuTEQAI76-oT7ETU}3^GO~Q#bRx|w*A6w2
zp;!urBXUP${|S`fG&I_+*><cir@^wwz(*pAp1eyWKv(!ZEm-^VVM)LA5B<L=;%!wg
z{zhB`ey5e0_8RH4BA8Q^w?tUSOfHakXId4Ym>t|26%-?W4M|}244~|Cx*!tzncrsL
zDnReuFG4{?Mq1l^!Alf{hk5I>SmlWSj|Ohrox*k&wWz3ak>tHvp&q@WTu+M7eCG48
zK&d2nT@gPgl$q}NOvn1CTHG>FNABTX1ADG=WpZqa?h`0AZN5n6a+Uqe<)`YU5clB5
z-V(GvjDAOX*uwWIbcG{3)-q6k%+V*>5>VZ>fQ2_tEP=WK-9sh)iGye8+RXXD38`FM
zH%<(aK#3^B;p<~B`KapeH#u7>#9ZCgGXHsgnlYbnX}7cZ2iKb>zjEO-(^;hmpltZ9
zOsLdf(ENxl;4@Ik{3(K2wS-obRAG+}8cdBHxj;gBG6I3m!s3H+l+Ys&XK%9lFdx(T
z>0kzM0|Iae9I4W0<etD`T9kI@pz|DoOWU0M%1Pn%7HTfh29WV^R3g&?s_i&i5LT?C
z%}i%>&fVR>zHKBi6M#Zg2p?}2BZ8;UP{&T~T!We{0!)s6qcC@FScWx@yPVzO-i&dF
zFqn(n<OPx8XUoBTq%ee7UOL#Bxp0|UFiKq=_xu_l08tWozg4W@^~`qS!~$~$Eeu>^
zBd0X~Is})rkphzH?t{seL=e@sdXDRGt!8N5F|bGOXu#+xT}^8~wZmHQ?fsD9h^ft?
z3yrQ&_90r9YAgcbfv0(^SMcTV?3nc-owtf}l{Q^%^v!0i>``bbZzGzBhF6EN!<i*3
zugP-k<~TKjk$Or)qq{9+!{RK)Mcv?x>w@7C`4}I6d*v|wkGoVvFB6556Gh+uW>SgU
z$HSd)|Es1;5Ot)~dx+rTcXEX{WU;$1C9c8XE1ld=9xoUaDKyFdAA(tI6%Cwf)@)3&
zkI=x%`&z{DAAf4!jZkH>g?)z!lK>^5e;}}2PZ9?w1xhM}rAj=(muE{d?z?y+ylaSI
zZ{k;uBM*AHk^h(1Dq&rz;k3?+^X8YYDD;f*@rJumXq&QF*4|e{>)l8HKl?nFp_d%p
zi^W#h3^2xI7D<gN3DtZ(|1n2zrq{A3Cp$iGFAyk{%}BPe8C1guP&8?`I9C@^2OT$7
zsJOjc^I{CE4(lPSIzbL?M-MnwS5G`WYxzT386tdgCNeLOs4UCacc~@85<jq4lt%E;
zif<oP%yXgsYtH`-zIxsWOeI-YH%GIPcb^KFc;eM)ALfuD;JY-$F97{oeqtMO*kPvW
ze<&$v=ePt<G`U&EsEArBS=P{m3RDWc+C(v1BzWNOK15_)J3&!~@Vti8iklcfQHx?t
zoCa>ncBxnjKvb?4Yu2;ZT$Ypg^@dXWKZwqa6PCu~rX4r&{e@~8P(C@~{iWkOL+9ET
zaj|hy=yuRF38d0JJrab)ViJSVq<#q0R_2iGwl)o8N*8DoZaNR~cn`DJ$=Nly9`^Fp
z;`N+WeOv4&%UtBM(nEi1XjQa#0GL+#X!hXh{|U-PJ3AhN1IzssC&}kkZx62|(1jbH
zw?#9<rT81RAcRBX20DejtggGhRUdG^xbc4yxWa}$U3vS%W2KUP8tuMhW_0eq`!X8@
z%<dlcD{STGcJTiA0TEGa=j-UiLh5{dcf0ZQ@(w~6+|{ZEct*9<YcDQ6TJEoOT42LW
z-#^-(GfNKjyxPaADydYL+_{+?K-?0Dl6hp+Y1HK88;dC^(;PSQKGg}AxKu9s`w{Mg
z`id!Vn103a%;7Uw2b6jq2UZL0%|Uafh%vqLNT_NqQf$EKM11t<RuCjSjE1xfHGLdB
zz=GZgxMwRu;xYs50u<XsyaWZQ=zjSYR0XtRD@k~3igO5qA9eOvRv0cFj5*+VHR52`
z^c>8?+3fOvv-_z1p;hs0gW0(l<5t;31mDMp=es+gI97-kK6uVRF%->>Ns+QxS4zRM
zMCPWBkt-{iFa{st6RF<9p%l{Kz)cARqOiA8JwHS6V*Li54>J1KH0aZMy;YQA>G=E6
zjYUITwLv|EWq%4uM}`c!3~Ga;4<26y5;02gojH22D|ch=X(V*O*!F2lGT?1n1&aLY
z<Citqj-FxUkmj{j2+DO-mnAmh?IDx3iOEFfZk+B>K4~U&+LacNJC)2it~de@m0b^x
zAT$n#|5Spe-Pt9Tw=Or4VGI<KM8k0%Wz!|Q20ba%FYP5+D;@7?=oa8C(@K0=*c^Jq
zd4~<bu13#0IWZs(vytPlX<Pv0ugnKEhosZ`w6}mUfL^SiV9JPrG#1*_lJLnoSCM?{
zReb%>ZCSHt`1j@|sWJC5ydrBL9%dB>)wmgazr6vAFYSlJTX9&H=<5o?XaK*s#TAgW
zjYH>R@{V_&K~Kqrph+GkCofTj@4p-kJv#@Gi{z%eG_9PmCPhfJZ}9ax3ABys)UW1o
zWmZLaG_~1fC!8Xk+^F}-j$dfc7^7EUC+|%(`RB3(g-od5+zAHe9L=ZVCKzb0lnGeV
z-qUktA}5+dDx!=lE^V<Oa7{$U_J~>(k}fn&B0j0)ni%A>WUwouG18c{CP`p*nKecP
z)_naToPamn-Bc}FtJz6@@AzBNE?P+TCq>3NrZt`B-#@I<3}N$CG!pehGz58>ixil$
zOjm+I?fnx8o!q=PqrIUfeP$i_fp8LRi$2Hv>A+?XOx;f{k9~px#O_aGiaOdcr;BK+
z+5`-0Q!-={J@L*!C>n|e%vUdWZmUWxkN21|6(9xpLSajqKoeFKJaTm6<%&8PoJONT
zjg_tL8|)+i(oj}B<JX*<4+T@R(Y**WaJhTUHLj~!j<Z(ShQvMMwZF=A_X+Xkr6e&?
zA6tSp&e#sEx;qI)BXuR0Ea;8iq%4)nRF~D__9uiGr|=gvl&P|{v{i`$j|TQTG)?^`
z^w#AU8p)v|^;O|j#-lZlS1YnXg9(|BEgJbC82ov#dh=bqeQ|`2@*xn)6bXX6nd3#4
z9WS_Aq<ZL(;K0eRPKt_bhdOGz1J5!;;du}A-fP{?OSSm4$@U%-5wbwR3CxnV<T#!P
z0y;a6+Yk#UJ5mj0oW$Bb@@MdmU4{`ryk+Hp?0II|p{h=3h6H?CCqtlp(t2*HMs0)4
zqCSrle|!QuGVvyPHU<?QI%5|fr^+uVwZ-jQ9!sV0ZKItS;>d<T#Q3%aL$b=VTTj%(
zL$w3zs-U5Ffvu_-0JX$Q$-oCR5sVcOK>0CCpwgk)MWi({7Rl^ES;=*}px%`wblv!i
zl-)8&NT_N)_hS4<%RTSRO*^ecz4FK{q#ej&$I!ly3^_N)tEb*Oy!e_NHs^~{|Ff}q
z-u6pjHW&oJj*>B=Fzx-<4ud<c!K9%j-rLEu-P{FPeWErEDdi!T92bM?IRQj)Pr!n<
z6F=5$S%gH^BIaQNDm4q7TAQm6e^3GM3?d2UJuWEr!){^Emh8M1@bVkiL1=5vec-^l
z^A2Lo7mdT2Yi+s9_GHZzmhH+8|28%#4tci{G?{cxAwR0ac24|@P8Huu>8DR5u=~1a
zg9K`mWt>}vklH7?ABue6ByX~NuIRVOJ@L9j6&(p4$s4|DXUPP&Cz8MOsdMj2=K15d
zKb(u|(L>r?YZAPU^|4;GX?bR+qURo-LS~~AYAdD>ji6)$lUA%1u9mW#Zd+lAhjo@`
zV+K4?@NW^%3<#0dEq1%($->CrF^3&?s3I@ox=qRKb=!o=Yjzwp+M?VWgGX(+zxHh>
zhCBGvM|-GF-<`W1%kM7`VF+S>)`eTXx2e+2_4}d?^GnXywQ>5A8r@21N7r~e>7R`R
zYh^w$bH{g}?>fkU#*7;{`Uwmx&v=g1vYFr=GTxnUT+O54K99Sxb_a_BRfs0e1rcef
zRnVb#OZmawnOCoJDBWTn4DtO>lMte(*HXHA+E!k^R^eI)U)C%lW<*d=X8eBK-mbP7
z0yn&@w=A_tPKo_qi!CCC$LQr@J>opF<oYC0zxy2^ixo?y<KT!{k5kSTg>@|FB&v=M
zTRvO-yRXHb%4y&@XAPSGsD&tXP)Rxoe}!EG?G$^;`~__r`Zv_Oou5J&m<gUlPhcP+
zLjgQjD|SOjO?1{%I&mHDOD#&jgN;tMe~cVgNMTWmP*K9^`sbZcMba9$=5)k4$j$&u
z3>;oH?iTVRabF+j3!WpTBH^9L%u*xXvVDEt)ct{f*xg|7mDeQ<)D1(2>}xIa$Bnsh
zi=rv5+;z$5u-l5m2?I#;uJ&Sm1mfT+V;ZebkSYev7dpM31jZ{=>F<Zb9I6W=aFbk$
zRuV+vf9rCUq^i)&#T>2PEujCR=%mv%%aKEi+ROY-Rt)wKjA6M?yUcPg|J*Q)J|bAR
zSnlfak*`xB&7{DG<MJe5eZ;29hu&VXEtj^A2mSxeJZf96?fdPHnj9tjx7>Iz<Brqv
zuf(2WB{1?k3U(wc{hl$%QX-wa2A`WkmDk!Z$vo}UQUR~JQo?aYabOHZ_7F;UGsT~w
zvG?5G-BXAExhkD>HV+Hpb}j#Y5hI>f`NvU;7&S@XMIdeXh5GzM1-~T>$gTJ8zeto~
zazY=G0bSYC{q?>lH9>!>AG(o(M0yRkv7}@bZ3Ip(ng{COl(2|hvR%C;uODAPmMVkC
z38oEE?G#-@$_oTBlDDgav;y)zxRPSLL8_Qf-%wa-?ljJ7;UGrlJWRpM(NR?CzJE53
zSV~_0UZ%Ajc98n~t7qAL=OxG1du13DEZdGIojmZ{aq_IlJl3AG9RTuym-eyNfOc$o
z*3R@wIOw_To-IrxTbh{-HD!lUL<mZ0j{yNEC$gWLU~efXP?op)W<B66S{7ZI2MG*_
zIvjGwO}g@qGgB?~n}w^8GmaxwFc~s$H-9<fSgY+pv6AP$3rJ5YdtI*Jp0Fcjb`&-I
zW7#P93~_>{>vVhgC>zUV#2nvvJ=PiXVs&uG$sr6dIK|5l<czi5bSuXF&K8sYx@5yz
z-hSXQ!t|R2o8ONH2`)8WT`xQXt}HITiDkeQHRQ2Akk`r9KhE|gQvOY0l(%sY4y3&Z
zZZ!q;w#x($wIwXGYAYYri#i`qJv<2v)4SqTNz(hFyKr(!)bX)?p-UerElUpBPzA-2
zmH{x`G8~m07%>g^)aK*wD@c)F48ykTEb!47Vr)0!w&={#!FeWw_=_(_gk!CID+N0i
zA8LB11hDNc{|(R~Shwi0Wb1jw=biVY<o5r}9G^_*oP4nNbo^&#L!(&_2o>S#GB$Mi
zXPGH%42toiSQLe>P|%%5bOC=$?gH(2NS*3`#7zb%Q012(@mI(k;V*#eJ&QBCX2PY}
z!&Q^q25Z+L0YqKMVlSUZLT+9wetu-q?1S2OosD1s(N~jL5q#}#*X>s8lO3SC6WG(?
zj3D-4OVuEXPNfD#2!K0pT%iR$S+KYZomB<^Ik3D9@3OPGf7ICp-&8OLz6T~+76@>U
zHdCBhk2uE~taUFbuyg=ewybvH%=Du`MurqZ$^Dk=4{~Mp?p+}yl<b8C&aW^!D#7#>
z=eUI6Q#!N9vC6e(RVj*d%pGlQ%?(Ssc#&&>Uc)pC;|yri-bZp>a<;(!YTuqNnmUJF
z=wzy)8JPaC&&k&-O~vQ7wiGdnI;Kl~>bC?~FMqjXn~d&%U$R2B*$JyCtO6SZi~-Uf
zJ0?Ad;vufVb3e#h5jfXc?D0AGlGOeyvTjMUxC02zGOrThz0{+S#!qF6PKCvG)7C}4
z+GpM@%l3!F@?#jFvjV0wo37ThC!mbTQU{}c_kX@|$q`jmM|$R0r8ti#l#QC(RsYeR
z>SoY(E_#bh(qpV@1@CFN%9Yxtt1P=8@iuu}u`>iE2rt6IW=F8P^zCb!lFve&s>WmV
zr<Znk=lu_&B!?5ltl4cp4eUM*H*(Ldx}50+_h9Hw<3alFY^jetFc`mBNbGMu(=~$>
z0l@r;>$H*w02XJFebQ?oxCe9Rd%$iQTb@34>xf!5%6<l(`Y%`Bio~XDB(8lwFy3_V
zP4o+p-_E-(Yf{2n$3hg3pIB~iX5$cxiR*U%eA?yeZxDm9{r;$f#5z|=`^rd<ixdHN
zKBKP7wG4fiQyy2sKzo}IVGZNJo-TP^o3d_%sESjF#@G{jOBq83;CX}+WzZn_RBp}%
zE7(Bb+nCFNTd2=9uwFfiKkUfY2FKq2xoFC>9~SV4(p}vuD2(CJCDC}<h1^M&cLiE+
zojS>>kMQhn5|j~FV_U3^NC2l3_O0yVTH>{2U_w!U!<FNz`d5N-!|qbg+m{3spv{Px
zePCrx<JzSDt2rQ~iKZ9w@}dM{ZwPRl%C;eeUj^#v4C-A&z3|Kb=ewddfVsM8w4OMO
z6<Y-=gPsQT;E2e{W_C(xb!LBtzwNl0m~7`S?r92ZSLGyJ1LSdt_UDBXJrN}UgngAl
z6|8-j9X;=<)GG_eeuR+c5E}Pz2N8g}tK<MphU+VY*NxRADg2v%T3O&{)Na`Z;NI*m
zSM*23-a_Cw+bNj$B3OKI+|cXjiz2Q3k!-bSWX59s*L9YEkUvg+SE7ZrA;L=H&NILV
z6}b+pUxqON{m5jASvX79#zIU!v-We9vu>QT!AfW#G}O$Zq)#Rf4fg-dw!}M@wND55
z%#X8pqQOxBb87!0Vh_?H?cFO69NA1KNnvt5G8F9k>1QUGL>#cf6UE54V~O;60xrWE
z7de*~_i_kRV3k!Smtk_Pg*$iM9%bby;rOP&-U@(&cj-%-M0>%Xgx(=C+bL~|??kOU
z+a)l%4MGehl1pxvr_SG);mgy;hqLKWzQ4zV57y85vT%*}wN&(l5t%BMvIx<a!Zs|l
zz<bweW&u@?-ONxQ4e*By{W2^N-hlT2yR>UNQt^FQj*VfLSUjT`;82U`3}$&VKaUy2
zag&}xXE#zl!(Ak?%VXjH_dkBvnp3$uzzG*`$6@k<C2D`MWDEck`D5#us)wTJEZ-<2
z-HfbX)-|2+mm6Jbz%3qQt7W+v-Q9^6v|?;|D)#PF;CR*w@1a+qP(R`xkmZl8;Obcg
z^~U=@SMcqHz-o<r)djh=UysnW6ClEbBP~Sc-;+gA-VDUFweCY)L?tnI+c?{EIbb@t
z_Ldy)lP*|_apu*!1aZfn{L_x&6UYsytsJeoXd}CvWdY~rIrL$pEJn*_v1uSYXy}<>
z!)tjyW5SXOmlm6<Aapr3)UQLFHN_Dk@6Pq&&T3tU&i?9HI|1E<=~ZWm5GCRAQb0#z
zGWYK|HDPBF`S09m;y7p>AviTvUhim5Hs)B)xt)1%B4FfS2oBV4E0Tm_)O&0GiDeVJ
z$iV3T#%5b%fMi6lq<BBleJN|wJkr|@qv2{(7`B~`wohNJnu;qb+@eyHe8|e8RtmbC
zVm?B($CAutg!%mwxl@i-YsOlc$pL*brYT)=%_Ww*1ijttr9P>+SzjrqF_3>VWcJvL
zd)2g<SQ@xLJB6Yiv$L4O6=#_#8RG$FTrDWW_#o3cId7<XjW9SmJ`gnya{DE0^fs=K
zp?M{yrG5Qyx4Fba_<BvYQbkL^VfkV*Me!uoYG%R{Z}d8{n_ZY7({O*T8mXm3Au_D)
zIqmNYVJKe0II{*K6@ST9uAg+hq)Qof@z4ONb@go0=Skp>n-5|Di7Qunxg20{^;SN!
zaIl~LVW|RXUiZ(kHf5%pMUNO?h1}?9mX@s{3nxW2v=;;q&N-3}Cpt}8G-<q&8OO)6
z<?X8P>Y1x)ut8N@^O!*Eh;NVX-@LemRE?{{90bi9SOqK{&l-1E2i(Y)r04F?l8|#q
z(V@73f8=oT)%AcG5*JV%?ni;*p<Pk>U<|p1N>otb7-LsjqzFFJW+XI-9Kgojj6R46
z_ap5eQLZ?-7|S<bhq#mL0K{rYTg=w(jIhAe;fDiQ)VAiZ?>N#)cpRJ9jO;5t5I|8X
zr~ew_8S5KXUHcC#DB3r7lyRj*r_jzJr(}8c3So`eZZa_eA^CO&gvd@DpxSZX@tjXf
z)mGuN^z!w2>3@x1b-V<yO50k|F<2lC7&BYE(qm+1<thP19=hQ>_iw;B{z--o9^kOU
zD2U>DBl4e<!3CqyS%L=2KL>ldXwF!!0#8fTfg=IMI-W~CqDD{<lvj|g9-G2f4dH@?
zN4uNgomeQanc)f=ZYIGzowX<jkiCr~!_U_DxOPA>$+TupX?jeUxX6{;k~{f$up;eL
z|KijlP8k|4v*smU)-z@Amc0|1EO7WAh5yZ?cIW<DLPCrZ-vyK>&KmZ9eK&#uh0@F$
zLDYHN=S=h!sMw||F5T+nk>rv&4(Y^~!e60-W2pm$Fr~VjW3BXF(yMBhK0g7~g(cUR
z5tPU%c%;_^{pgD?eW|LqA%W4&PYSi$ddC6w(o}g{>}zG@Siko*vU;jwN`h>3H89pf
zVhSN&=_W%|w=lZKcYLJnMAh2b$thfq%W2U?=8c7ZO978m+L!5LUMD;B(}GPDsB*^>
zzir!7Cx>2<8Jz;ew<k;~f1^eF-Zu}AE2rJMn5aWXlV^lEUJ4~&99qx=aY!n5E-RMV
zK}gCcCsl+cVaJgc`~8(ccAX}RNmGPJ_BajY!Y)8H63u*Ph-SrB-_?C*(XtQi-T_;f
z`gTTVRM;e(bInE>bB8Pm7>rPoo#J9cyojBj25&@#`618oI$jP%SOUV?gd}ZE@-vI9
zkci&wFitJe7PUGLMabTpMny5B>e9P_{eusr-QP@23TTsA-7oYQfFiEeR|=0vCIQ*m
zes;UgS_g4-D$QWf<yzw9WiG!KBL^>yyFQn;e<MaAm#oO=O4RkJw)5zxM0uhaO>gpO
z4`@MW9|cj4_W(fXu@H5gUfA5_*J`2nafHkQL&S+Mju&V)CZ1`=dQ!qGn*7A6Nrp+0
zz%B<ul)_j=kHGu*vt_Gh+~j39ywv_SkyHBrfeO{)F+uf4(FKdAM>r*XN!ur)3;7$y
zhem~wdE~G1djy&sda#yVWldC``oP;;cM44_2V1gXO$!O+K{}gvgzXxTPhVSaoxJ*K
z_n<G^&N!<5v7^(qR+8H8xM9DWaur294?dpq$*cOxr5|1NPU}^_Xaux<TYcu40;sp-
z$mMB10W;49mO@a7I<7c`T$RT}s53)ysn-eU+)oYIs&J++m7i#V&;<KEJtJ5|<fOxC
zw;pREBGO99b_79L{o>GT8K><}O@XUJ-Au55)9fOr3-vx3(8&Rik-BXgbrCQ59cr(P
z;Wh`w4?*S<Sw@FZ>Q{;sOX5MZgkmN15L<s&b`1Ru+=DmVf7hkE&TJTg&A-aL^prfI
zc$8yMOR&cSu@*erN4M7x?!_bl`E&E=(IdR<U5x^U{)r*?sE~a}a*f24kf_Op>#Ks)
z3nS-C_o2nQfXoSou;PgAg^yEJwhHb~lji3iK4Xu;7QVJ%8+AUpJFGRFGQFmH1U^xj
zSw0Ev2oaZnQJP2E4;=LW__2(vK4RWA<cgwSm78b&h1)19uAsJNVQyV@2;Q^?xrTay
zHhlfAfnqn@y6v(b@(+OK>UHn1$_82^Fyw5UoDd;|Jm%({Ik{>~p&6VADs3N8rORJ6
zh2u>A-=W>GgthBs%D?R;zhO$^s(YTRSQJaccl)57+plrewfiw!|EFE;qU!le|Av(<
zn)B=?&UMx}69Vr~W$okAAcd^b{?6B;_s+)S<c_RP9`TlC|5`DB$Hw_WciUJVy@xD>
zmD=5fZIFyr_=wy|{`OVX@jLfOr~%DN5pPV!(Ub&ga;N}isUSohF>{|ilIkjf)Rom$
z`;r7=drFE7ZF!r(`K40WZpMs$#{O5b9yIhh%mPG<_`(EwvwMv3_p))SC_D-H(O#TA
zETAx1Afov8gXsb9xsQ&|MvSZ|3K;l}u`wo=kTMJ7Av=AxcHk3vC;Ao1w7X*F77B4_
zb^{#>uY1zZ0ncJ)TXc+#gWV%i$f8T*VfOJ25U@i<>$Pu;OvBb1ZJH<7JqEV;GLoq^
zZQID2P0>W}6#{(>+0deXz)KZ^y`(f`wA9i+fEg1@Vl`X&b-oviqnF1MDC^lnGr%7s
ze(jGFmBvcZ%1LgfdfQtux_aP=xtUh0RzYQ+z<NF3-$WO-uDUFoBb468xBd9Y<4-V8
zj_BUI*?Q2l;vDz<(NKZvCHSAoQ9uEMJAGb@454HbL5$W1Yf3Qv2Q6}TzhTxYb=UjL
zKvkD_@GO8cClAkqovY*>XN1oRU%G<ntRLS#1wJ^33@Ti?N2Vov>2M?FOZ0&!IoEmR
z0_SnOu#jZ}w6Fh561Y0}5ou-0Ns0r1Avprsd}qIkdTBgiNs}ha>671FTu%*qczl#N
z+Ja9KvD9TV+o&&mDG<E4QQ!_iTG$QM7uMAlJ8COjhdmt*-mQExt=f!MD%GurDtJ0)
zGe!9)g=bJ(3zqPH&VGM%;`t;s$5ikcjBFu4JLU45mXewy)3={pb0a+0=-c9tMlpD4
z=KFfil?UV@`&b7E?BGi6aw4dxcg4=WTrlO50S*S@=O^tAUv3|sx>>ZN-meuPcc^tf
z9nq*2@!9bBh`x_9H%hQI8myeUnd>xnuJIuPqq!oTXsjM0-Wm7`qXcGYngP33hF9Z>
z6=y+AI%4Yb4J9p5#LGhmzzIp+z=1dirgw8Fa$8z;x>=ehe{#8(P3j4uN_e*xhoQ>$
ztBC<zgGK1O8|%-2#Kbw~e;Yie4kN7m!cr<2+=dgfQ<^t@EtocYH{V*YNgaNTS7i}d
zj=o48UlXm5S*b@C!Qyh7+TlW`eK3W|?uq-x)5{!YIl|S_PWkk6YM?~uz=96I6KE%7
z3(~nn?$PjuzBSHWRz7$MfD#4gGi6Aht2hk~cd2k8jZm(b6qsIQ>x7V<v*CQlRo3JO
zZzBI?mSkcuxn@?ieT9;a6Pi0LBJ@?j<!}p@Pz|CCc#QgcY3bU>(Bp9kUvyZ9tS!NZ
zXXTh{MrT`^2PmBi{ap?J9^f4;xb4!2h`V~9^l=z5)u2-u*-9==birFP=wC3@sF~?`
zfCy%yS7rJ09y(N_F*LuZ(u6YV@{OtsZA3Hfs)0kqzT97JNYxDRgLbiORbZ$ewC6z#
z#)@rVDcts({3z1OUG`0NV}fm1Rj_F2eP|l=b{3b6aV{f{mtXY(*-YDOZ!^`d+iq`x
zy$zllTC?hX!SB^g<XOJY-00DPE8<eA<SxF99AeX_s~tLpB^V~={$7{!%d9RH`?byJ
z(~VtIRN&M|P?@`28I$LozfxhwK9;0!P+eq@{gr~=i+x~Jo}$thEd7;aO;hZWU`!ER
zxJ)h+i#@i<G#&hLhyGh@Kcw)DZN4rRCg7j;mHe_}a=IejnC@VG(4_AyK-~zco?Uu6
z>N6D01^dP?LE9P5BH3c?j;ird?~pB+R^Zpg`!<ScturuP#b6m}2(kEr8?A4ixSaF(
zeK&uS5d*Ym3Aja%|6x~<pAoJ%qfFYKOTWDQ-M|KCtJ8FTKoTOuq+)BGhZ&%hAw)|5
zK;;C4im)i*xd6JA!)3Y_T&orhM1UBX(f}v9w+Df)WBkE?Wix}wG?xCLNALi0LtSM)
zsa!iGPJIii8pRa!C6A4Be|^CZJzheHeZVC*VKEJi7pfnLUjcBW{x-t^U+zHH`F;*^
z*XKR7Y!P*Om1J@LG`z`pvJNx+{r}9?b(IIvED2HOox)+2(P|QsOfPJ!)~h7?(pZRX
z;`oi1q(rtNg*Rm)pC!L>6qjq2nbZaA{Wbcu+=sIz&5<4HF(wCQ{Ci)0^U;rWPUpnk
zH>cQ|BaOG=-ziL7xm+IM)X*Nl;vi2Ix^m?xcv8sMK<LwF3|_ut?$Aiwcu?58PfzfA
z(&HR7l0Ls$ng0}OnQsVjwegP2&j>2$-YvHUJIhJUZL|aGUMS$K$E+@!N~6}xzK4M<
zEC(6~YeLQ2%_Kr*K`$cS1nNVm6!f@oxEnmTV)L9`hV_D{_Ws~c^jLIIvSR$^3JS<+
zU@zh`2}AK^xb3mSf=1yVrk_Mnowg`^ojVo~lt$G-AVG6IbGEp26XD0QZi`?cf>pL}
z-4Z^Iq7Lycyxi*wGsszdJ4`6LoX}lFlMRWD2WJWb12V5|hVo3fF8G;(uxGV?(<9{v
zEy8p)rfG23^l;oxcsT^s^z9~fRH}N{jkyL~EAvyb_g3iys(2Qp7aRS&_$#11b~kZM
zU5}UWIJD5{<l?zH@gj%2Xn{kUOca9(UC|p%gtxA|gCk5^HRvVyx#~F5!N+wCxS&+r
zXOI$rhKCF6MrJ!~DdJPCGW+hF*KSLRDb4}0jt!#m0brZk<hx}DrY@wQLzPdi-Khi<
z2RzqK6Af<vG-98mi#sz+B;vbP;ZJw}`S6i+CUW9>_Gm6op%Hp?j2Cra{=;m3CAr6l
z$>3QK#Y*?s_E6kcezTUMeA)dgtjvj_k3xxM0Qv57ce03~8xsBMa>R_gPQPf#<xVoV
z_(aW&KO#NXq9cOR6i3mvPn+=*e$-JYOvjqShw(G$VuJLs+!)rJ!pUy1az)@%^uoE;
zJdMsJBMefp^*GGUdIIMw5Y4Z-7RuN`icq{iu{<P(esB*SgKQ2zEoqS@`h!A&W=sOm
zcI00}BYL)YAQf|>@LdSo$~epbNqo+G5sa(J1k?!MASLPvn|H17E5_^7PyMfIqnrq^
zYT#q%k)Q1y-r_ryQx|f+s^YkdISQQLb(BY(uT?soeK;FT9&z1>_V_8mb^L%4x{W0m
zPyQK2V@r-JB83(M{(U7H>N6iekHXp$RI^9+7Sz!^ithti$1=1IWJ->&OJpr(+9Am-
z<?U_I!VA#vcQgPbe9fYVJzHeCW|K?TJ4+^JfGBZ!QO7ql3!YYYLcwxwG02S%F!j;9
zX9&y0z?>e`>EnEnluxrtT3D!ta5<c%VyYLQWv;j27XLb+>T)cIIBaKImAKrLs@ODB
zA+o+R263e+d{YOLEujC&=dAf{51cPMQ#EF$ruZ%6-HB({dLtsRObu_kaFYcb`twqZ
zYgS*J`ocRsQ<kcTWRzz<sCWT@Zr(w`pZq9a`Q~wbAGoqfO()R{oBuc@>S{ac375Og
ziRU3zO&84vysDSM^Apjbmi_!rW*_-5EIS%*jAp4NFQ#oVq{(M`)nx7f?>!!`rX8fg
zF|OZAR^gH=`oFbv6ZZljHCPeyCdK){$2P}vNx<}hf_$l<#$!bffg1Pck|5R0^VxpC
zvMZYci2Ky@7TGUHx|l#uq-bQ)h{*{*D2!SVzn$xc-g+Q<R2%+AeDKx=$Z!sCf4CZh
z`Vnz;etit>LHB6xn8XvzixyJ^K)PoeuE7%EMA*(H-^wcd)L#alEe)`n>CJ%NC!{mj
z$<t!#s%p=mwCPN@1WgCq%;?(KgFxDq1kHPcTfC@#<C4pVq{V9-)a}&N?1p<+YkhcE
zcQjS$Otd-5wBcUDeT;px@6auEx2~A~Js{JMHBt7#0uj0#y5G6i4YKvKmD!BMt=n$I
zbTM0W6)Emqvi(V@)NQD-Dr@tyGL@u65#Rp{@D@ifnarinp~rz+b1d2IX|jF>7vvz?
znUC^gENLYH)K*Edf1#J~@EZHe_Td_#*m=z)MSQZ>3usy%#j&fIq+I4sY?E#8Qdi{!
zD98gGeGOr-uCQ}NC<k`?igfTl-+vayMamMW1bp~QUR@q(ew3JcV~W)vfy|v@&V7-E
zGS`O*dKQ3rJ&i_ZYZT)8%wn0z1!rXFJf4~AlP^chsZCTA5n_@TZ{aBLI&-Ye%FOBS
za<KnNeJcjGEf>*P2DY3Ufv9PvW*jcrtrXmy*|7ObSf1d%v)r1QA++W7$!Rxq)y{lj
zv5a-Qb%2SX`?bd`pXCK`6{X(;EEtQcK>N*4+`HW0Tq~hzG~o94RBR9OfOBx_@$v?+
zBs{JALU$O|2e4=ws;k6Yk?#3WK=suwJ))%(uG;}v$;uV^#THO!YDL~c&H9hL|42Gd
z;|>y9CpUm0f+F~|z(&c=F7YJfW^KSOm$`P?mBZQajWdZAx9zAo|Grt&o{q&rdFhT5
z`Y_ohp%UJwv!={f>pA*dOM;^=wLg`hz%Is~q-yEbgLS+eD9m{E7ufp_1UM^DoNJX2
zJo8M&)Ux(xup<fiP1db1{A<`E(c*1b$!SME!8-2x<{ZR$Ju-UE&+=Q(NWfoM<q_3(
zpH_zrMXK`)WD{dN+C<7^2$NXgLYNV#3(-Fk3y-ykro0yJXSatnm)J&UE;p`J{JCvE
zbjrP6kM^xNcOe9>Fl{I&j!?TjT-_Z>NwQ54rAe;2{IL|(pHTT9=8v8dy|Gj<yT0Lj
zm)8r1P*`wTqicCEfuvglYwzAtaJP$7*r}k<6^#=(+;5TA1&R+*s>ynK7XE&}KLi`@
zO=e1ES#NxuAAv+TBVZ&753FMRX;+tg%*%RgU1)asI<+W_6+-+mgiO(J+S4K3^`@EI
zBSbuaO{I*38<{nNCj^8on3Xn3O)tt=LH?eN7xup`@I}Ararv8VR$)a>h=)d-z)lsW
zn-`m=1Q8BNT057-LD3Vw02Cs;6n^hWH54npaZ@8f?byU7e6&%5Di6KsrLf4u7vxf!
zi6VnTyI8};{Xwgr==EWJt?6Io%7M5seoFkyNQAUGW9%m8gwFvyXw7tce|Sb-UqlI+
zO6l4s-iMRMm(cYRccyNU^~9;#CJ0vWL5YL(;7@aX>i~GvWSc@`l+P-omPUD`v9ff;
zK$))Sb8IG|Jyd(D3ZO|f3)Sb2Itzdx<mr}hvFtj6bKKeacC)rfeswhI?Ih{_Y7!<$
zP(|(sdAO-s`J}b&j-=aZCdv)*m)xTqFj<YGL|^mVmfb~)z|qP6jFf%iCMzZt8Q5`Z
zT5op(l3|wC@W>;2VrF`y?zNSxy11K%*V_6N3QHDu{r-sOcS4t~q35a2_;$}!*%Xp8
zk2H<w&7IPuC@pB#4X&rFF4m+kq|yTvWZizMBSiyB5Ihpm8B|IinXKnF4rR!LSRR-P
z%dnFdz@KoaNXk$it`cM=5K0rrh_CHvQf7p0AH_%!o(M)nBT9S=S~P=shsI+(^cmbL
zbRHv=5K$Z+e-rv&ccplsU(ATE^xfgqEfKe>BJTN(`O35=C9)xEI7%3YD^zp9h%^3x
z9=t0UO^js$`RT_((I_>iqisBB>(UT#)<h?#EJ4Xcw|a%62=0eQFeM|CnFy~I{g9!N
z9N;2-=apZ5_Q8#S@GoQfQ8UB%G8I@|NqiOPW26?&Ei&vYJqadG)?O5MR}&gzT-)av
zZ6-ckxZRoK_RqvqR-u~B&f?p``d(KNy<~AuZ9-v7FEq;QDhvdYV=iRO?O3#U#FK~z
zLU!Z17>yn)v54*srKTG8ZVQf^d=m#ZCzkys@3Y8QA3;0zCWjUgjD^-a3klBJ7{Uh`
z@F<0r$gz0YW*W;;E;8Ax6}hU-TtTw;NBho8_4_gtrW7&)SMNxrjTfV0+Dtff>?;~n
zog*E8MjXe`Ro3}dQR>ZF2Aai=kiM*;Ok`vUOizN<YWN;!bF~HH7cga42*3vIRrmX2
z2xlvrG$g9bFU7JnFxS2F+PsUza7!kydtpZ?sJV~6_qA7|4w}$ldJ`wGp<ZN_g-(m{
z0OXP`v-4C!OnP%>5J$`|E#cBnDx4uwtxt<=XzDPeUTRm@+v+&DQWWv74cWw-)i3ux
z$5?kS|4L@}y8nr!!Wy)d>riO0zaeh?>GNw~=*dyltNflqQ@$k>XV>BSA;Png0fb#x
zbRJ+k0TBevrf}uzX!UldYTJ^|ar_{HpV{uwjf{2ZB?{^1RI2{1t1NST1?Kd$BG)lA
zS4GVmmOa90Cmae9U}g+?iU*8QaS@&ATJ-LVkKv1d7?Av`q=$1f)7U?dvFtosq<7z?
z#-$|y%aIJYYh^$K^s+FwKTPbCB3azIN}`aql%mvn9)G!fvX0%}&oY8B@8rw~m)RT3
zRP)m1f!^Uy=98kTOb#3&#lQPeKV#D`hAFh6-(2OAJKuvGV7@KQBezGV<!P6pJq~<d
zkUbaMpRy3^(CrlNEwdj_sR6EoJ|iylL22s{$R>ZZ9s>_?!eSgi$@7lWw_^%v{U5Kp
zu;D~w7@L`9$(cPXZVL0nKH~PSC^zoJObS3)z1>>b2CYH(?2bD;D}B~R-8uN(C+6zD
zkODmT?D!_3$L#B^-x}O`69z{30Df4Iy(Ek7IbrbvhywqUI_b$f8`)aiGh~hAM)Z=9
zsuV&|_rUvFFORt;Uc!wlpIK^$XxM{qE0WvL1#jdvH?;JV5J?>duaV%kK_)RbeJivF
zvVC__ft1ky2D)H=W>EB0I^q1@*D81{Of1-mSd*v?Cho&7N_UDlU?1@J%>sY$@!qBw
z{P}Ha^)go@!?nBw^olvAe<6&Mqu;8moiC!=jloM6aYGhP;jO;Zu0g(EawsY=X)FkE
zRYH)c<WTnB*<yc?rZMW#%*L;+pKRfA68}CcWlez{9^BLzg0lXe>`grv<VszG9g#@W
zB6h3c&96Qf)f)2+PDFXF8%c>3NKIEs`rd@U5t^H?E%kj+K5V~Sp%!IOpgH;i$qw&+
zbk*sdDCq@D?UG@@bB?WW^%+8Hq!K)@)ppuqT=Pz6M%Fb*v2%tWy~#a=+fO0jm5a#Z
ztTKLH3?$|a@$wuc&=HFRjU*Fl;|W%>V)9@Hsql#9Pp5H5PcRcIkEL}NIDh85-FqXH
z(aiX%gGKs<pPx;qS7k$)65#Wf(SS5h7JTcORF^KeUuk{iS|!XlPHt!yjV4m<@<#Qe
zD+lS8?0hUh&8q33x8qI<$JiHq?U!j;SaQhWe<O+x4IsElRAj~Yo4u||Yz?iAiI`cA
z;`swq@UDvlaq_^(gh))QjkQ8~XMd@R%Kp)>HJ!jfJ3h|%v-KG;)P5N7LBEsWh+lk&
z^mLlODg*eD-CNnK?zxe($B@fB9E2tov5&ov)_sW9^dM*Bu5|9bR|)2m_qYC_UNMm2
zt_P)h*I_4mS@<^aGjRz-fZK@-coJm@31_jc2$0iI#fM9^mOpd$AYhkuxDh}p^<nBB
z=!nVVaWMHNKpJwc0S8dt<F*$aVv~Y<LRM8d%gzNiHagiH1(KxjpHAjFY=<1sCvNz!
ztvW*d#%~9+Gf#sQRAMY#LnW<j?7Muxsd#3rvC_U$Rg<dCM*A*ifeaq-jiEd-g@jQx
ze;k^|%9ow(Ig<txD%m4z#pZW2TcbmT^G65(_pYUlVwT2Iz7eiG^Aw6<+p+fJVfil9
zCbo37M2pY8{<>HgF{>EEtge$DB#=;$gG_6Tp8HGbKf0!kC2J#gs8&-QK!4WwqsXNL
zs;$2uB=v#V0y$8tK_o3Fy=bz`C&ooWZi;G<3j#i_?{2Ot`s>()S;(9q5%|xb_9{tu
zJ!F&;>$P~45jW3nFQH%U{>OhrcXi7O5n1zeqx?;%nhJx9soG^7_kf?ZM)>~j3zJLu
zs~VHW3#SO!GA01=ikXm*oTq&tw7m#Nqw7tRVGxHtZXZhkhuCO{=8Te%AVr^Gl0gTf
zl5N3=si!B63!id3dG1BPuqeoqUu<mgZsCqzTFG4Ye-nZIx>`Y3!?ia+EjzD!W@&AO
zUUXE8c!I*AF_3u#^1MIv4s3>(=1t$*j%@OPN6bxWYQ4Xkrc+c@C&0_q7+*q;6yd?!
zLZs1*Y<||cHPl?~(S`1ho;SllO>tOB<ha32e3T5reZbMI0agZ|WsI1)VN60(n#~W2
zeWFC<?C8R3Bip}}?PA#7QuO>gcHe>%MQqyy?*l+RwYTqRWvh4m)30^w4x9o>-aimd
zaSq-77%y8~tzYLTN{+20l+p*ED7teSG5%5){Ar~<IS}MCFbQEA7SyzTHX*Ws@%4)b
zybXYoMu=6?@TeFX3=$~25uuRhV^dN=yU!zi7>Xhu>vi*~FvM<fVr5pl(B<0dtD3qi
zmE>OX2<3>8NeM-q1J*0`o>c^qEPgm>rJneWW9pp6k^DjWUsFFH9G3WSNjqhi`v}ez
zIR<5}PzxZt8si?%lgP3e6Ae0|4D-Xy&2#1%&Xo&*NHtaLL_)dhsf)m2V5c=N8?4tR
z!ZZ|<4rYsS>$@K8NpJV$%40=g{28PnZi2U%po*}5lSXh}EC2@8$O$2YUL%y!gfa3*
ztTx{_s$I47jtqtH)&OF2iaqi!>w%OC^ceJH5Zr7dTYt_hgNREg{H*D+=u&4^tJuhr
rr(tJ;Kcejb00000@Mi{+00FhU0jH@CzlD_7J}?c70ssI200CKA1gSZ-

literal 0
HcmV?d00001

diff --git a/data/demo_ml_tibble.rda b/data/demo_ml_tibble.rda
new file mode 100644
index 0000000000000000000000000000000000000000..a09f6998557688839c5311432393e9b0ca417d06
GIT binary patch
literal 848
zcmV-W1F!u3H+ooF0004LBHlIv03iV!0000G&sfahq@M#6T>vQ&2UKVg<Ze>RpfklK
zA+`t`^LEM5g0JmSTuuD9u`sR7lvVPh#<099W(Y;QY4=Q4DoJ=7Y`4keF@$3UJH^{i
zmrgC7E|r+?{-&KT1?NklPzp14tr^+_s{z0dj@OW6FIkXU4r1*Dr!~{{I|wl{NMDuH
zw69+fntYI4+>_BV9SMh;Bj$UD&ZBj$l2#8!+Ld^=C*3ZBO+s3NAD#I=lb@pn<WN1w
ztbZ75iv&niXho5#mkUU%>Kx7+fvHr~y29r-OCx0rYHqJ52Ka<T0E0to>JGsMrjZd0
z4A3WcSpj_2Fp!*`(o;*lXi5JB?Ks_{*q)NA7+!-gHDI!H9zNWraO08zd_g`$uN^oA
z{PbRlhr3qq-y(hK@p)2GtC!pyAjHOdle${KwLh!1!u}mSGf%8Xfs_9#wlYWVcsN8I
zTnQG18{Y$r1whysA$M@!RSl%@fIkYy6vF>hQmS{Bc(r+&9+#X;ggb))N7V04%Qc5<
zC6jS+EZ=GRqp(*%q$ElUL&Vc>N>cTAc8~JRzpqDufxDA)=gdH?3d)K4Xz7|kWou#6
z*K2&xpOP~MMrAjal|z7RB)*-{yjyio<4o=Fl_G|WmA}?@RY}=If5L8`hQA}N+A!9J
zsBKq)?sjJM&Til+x6oaushKY;8XdAkC(0h9WAyUS3j@bJjWvrKuM`YQp}%b;JD2iP
z3c@)-@GoBNo`y~Z!dyO%t9UsO@TKa>xUQvVDIWxmbeNY6C#NB04pvkn4TokYwHa8Q
zi%$R9x4(NH<TMJ?Hh<Mrft`bQD^NC&9kMBQ>%WaM%XY^lq6h=m9&@P^wNto>Fnv7m
z4iDaJ^<NKzn{ULB=(LrLK*xd+&TLf}N8hUBT@fa)=)s$b<Z6(!3d_vWMUUCY?+xU3
z`_wm-5+MZwcmg(R`|*wn)a=90Mga4R(*}it9~)L#>~wMh#;jP}G#N+jPt4T&$IT$n
zrsj@sp9o?z>Mo@!HuD?WClh@siDGoSAZeQgS<Nil-DmS+N)xw`ihPvhzW@MSI@f{#
a0jvg~$pQeNCg{UHFb#_W000000a;ontctDx

literal 0
HcmV?d00001

diff --git a/inst/scripts/make_demo_data.R b/inst/scripts/make_demo_data.R
new file mode 100644
index 0000000..22e9ea1
--- /dev/null
+++ b/inst/scripts/make_demo_data.R
@@ -0,0 +1,54 @@
+## Build the data() fixtures used by amRml example blocks:
+##   demo_ml_tibble : 60-row, 80-feature ml_input_tibble (Sfl, AMP, genes, binary)
+##   demo_fit       : a tuned LR workflow fitted on demo_ml_tibble
+##
+## Run from the package root:
+##   Rscript inst/scripts/make_demo_data.R
+
+suppressPackageStartupMessages({
+  library(amRml)
+  library(dplyr)
+  library(rsample)
+})
+
+set.seed(42)
+#Loading actual sfl data
+fixture <- system.file("extdata", "Sfl_parquet.duckdb", package = "amRml")
+out_dir <- file.path(tempdir(), "amRml_demo_build")
+if (dir.exists(out_dir)) unlink(out_dir, recursive = TRUE)
+dir.create(out_dir, recursive = TRUE)
+
+#Generating the inputs that will be used as dataset to load for tests
+generateMLInputs(
+  parquet_duckdb_path = fixture,
+  out_path            = out_dir,
+  n_fold              = 5,
+  split               = c(1, 0),
+  min_n               = 25,
+  verbosity           = "minimal"
+)
+
+target <- "genome_drug.resistant_phenotype"
+full   <- loadMLInputTibble(file.path(
+  out_dir, "matrix", "Sfl_drug_AMP_genes_binary_sparse.parquet"
+))
+
+#Subsetting to save on size and to run tests quickly
+demo_ml_tibble <- full |>
+  dplyr::group_by(.data[[target]]) |>
+  dplyr::slice_sample(n = 30) |>
+  dplyr::ungroup() |>
+  dplyr::select(dplyr::all_of(c(
+    "genome_id", target,
+    head(setdiff(colnames(full), c("genome_id", target)), 80)
+  )))
+
+data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+train_data <- rsample::training(data_split)
+
+wflow    <- buildWflow(buildLRModel(), buildRecipe(train_data, use_pca = FALSE))
+grid     <- buildTuningGrid("LR", penalty_vec = 10^c(-3, -1), mix_vec = c(0, 0.5, 1))
+tune_res <- tuneGrid(wflow, data_split, grid, n_fold = 2)
+demo_fit <- fitBestModel(selectBestModel(tune_res, wflow, "mcc"), train_data)
+
+usethis::use_data(demo_ml_tibble, demo_fit, overwrite = TRUE, compress = "xz")
diff --git a/man/amRml-package.Rd b/man/amRml-package.Rd
index bdd7a8b..50cd609 100644
--- a/man/amRml-package.Rd
+++ b/man/amRml-package.Rd
@@ -19,6 +19,11 @@ Useful links:
 \author{
 \strong{Maintainer}: Janani Ravi \email{janani.ravi@cuanschutz.edu} (\href{https://orcid.org/0000-0001-7443-925X}{ORCID})
 
+Authors:
+\itemize{
+  \item Janani Ravi \email{janani.ravi@cuanschutz.edu} (\href{https://orcid.org/0000-0001-7443-925X}{ORCID})
+}
+
 Other contributors:
 \itemize{
   \item Ethan Wolfe \email{ethan.wolfe@cuanschutz.edu} [contributor]
diff --git a/man/applyBenjaminiHochberg.Rd b/man/applyBenjaminiHochberg.Rd
index 75867ee..40e5897 100644
--- a/man/applyBenjaminiHochberg.Rd
+++ b/man/applyBenjaminiHochberg.Rd
@@ -17,3 +17,10 @@ Returns Fisher results with added cols for adj_p_value, sig_after_bh, Q
 \description{
 applyBenjaminiHochberg
 }
+\examples{
+fisher_results <- tibble::tibble(
+  gene    = paste0("gene_", 1:5),
+  p_value = c(0.001, 0.01, 0.04, 0.2, 0.5)
+)
+applyBenjaminiHochberg(fisher_results, Q = 0.05)
+}
diff --git a/man/buildLRModel.Rd b/man/buildLRModel.Rd
index 3ed514e..344617e 100644
--- a/man/buildLRModel.Rd
+++ b/man/buildLRModel.Rd
@@ -7,7 +7,7 @@
 buildLRModel(multi_class = FALSE)
 }
 \arguments{
-\item{multi_class}{\link[arrow:data-type]{arrow::bool} Whether to construct a model for multi-class
+\item{multi_class}{\link[arrow:bool]{bool} Whether to construct a model for multi-class
 classification}
 }
 \value{
@@ -16,3 +16,7 @@ A \code{parsnip} \code{logistic_reg} object
 \description{
 Builds a logistic regression model.
 }
+\examples{
+buildLRModel()
+buildLRModel(multi_class = TRUE)
+}
diff --git a/man/buildRecipe.Rd b/man/buildRecipe.Rd
index 5c2d790..704dd1e 100644
--- a/man/buildRecipe.Rd
+++ b/man/buildRecipe.Rd
@@ -11,9 +11,9 @@ buildRecipe(train_data, use_pca = FALSE, pca_threshold = 0.95)
 training. This can be the output of
 \code{rsample::training(splitMLInputTibble(ml_input_tibble))}.}
 
-\item{use_pca}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to use PCA instead of all features.}
+\item{use_pca}{\link[arrow:bool]{bool} Set to \code{TRUE} to use PCA instead of all features.}
 
-\item{pca_threshold}{\link[pillar:num]{pillar::num} The proportion of total variance for which the
+\item{pca_threshold}{\link[pillar:num]{num} The proportion of total variance for which the
 principle components account}
 }
 \value{
@@ -22,3 +22,12 @@ A \code{recipe} object
 \description{
 Specify predictors, outcome, and metadata by building a recipe.
 }
+\examples{
+train <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+  feat_a = rep(c(0L, 1L), 5),
+  feat_b = rep(c(1L, 0L), 5)
+)
+buildRecipe(train, use_pca = FALSE)
+}
diff --git a/man/buildTuningGrid.Rd b/man/buildTuningGrid.Rd
index 498f310..1841efe 100644
--- a/man/buildTuningGrid.Rd
+++ b/man/buildTuningGrid.Rd
@@ -11,13 +11,13 @@ buildTuningGrid(
 )
 }
 \arguments{
-\item{model}{\link[rlang:vector-construction]{rlang::chr}  Currently, logistic regression ("LR") is supported.}
+\item{model}{\link[rlang:chr]{chr}  Currently, logistic regression ("LR") is supported.}
 
-\item{penalty_vec}{\link[pillar:num]{pillar::num} A vector containing \code{penalty} (regularization
+\item{penalty_vec}{\link[pillar:num]{num} A vector containing \code{penalty} (regularization
 strength) values to try (for logistic regression). Recommended range:
 10^-4 to 10^4.}
 
-\item{mix_vec}{\link[pillar:num]{pillar::num} A vector containing \code{mixture} values to try for logistic
+\item{mix_vec}{\link[pillar:num]{num} A vector containing \code{mixture} values to try for logistic
 regression. 0 corresponds to L2 regularization; 1 corresponds to L1;
 intermediate values (0, 1) correspond to elastic net.}
 }
@@ -27,3 +27,10 @@ A logistic regression tuning grid as a tibble
 \description{
 Builds a tuning grid according to the input model.
 }
+\examples{
+buildTuningGrid(
+  model       = "LR",
+  penalty_vec = 10^c(-3, -1),
+  mix_vec     = c(0, 0.5, 1)
+)
+}
diff --git a/man/buildWflow.Rd b/man/buildWflow.Rd
index 1bf7932..e3a1a6b 100644
--- a/man/buildWflow.Rd
+++ b/man/buildWflow.Rd
@@ -18,3 +18,14 @@ A \code{workflow} object
 \description{
 Builds a \code{tidymodels} workflow based on an input model and recipe.
 }
+\examples{
+train <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+  feat_a = rep(c(0L, 1L), 5),
+  feat_b = rep(c(1L, 0L), 5)
+)
+rec <- buildRecipe(train, use_pca = FALSE)
+lr  <- buildLRModel()
+buildWflow(lr, rec)
+}
diff --git a/man/calculateEvalMets.Rd b/man/calculateEvalMets.Rd
index a06b7e7..c4529a9 100644
--- a/man/calculateEvalMets.Rd
+++ b/man/calculateEvalMets.Rd
@@ -19,3 +19,20 @@ accuracy, normalized (to a 0 to 1 scale instead of -1 to 1) Matthews
 correlation coefficient (nMCC), and log2(AUPRC/prior) based on the AMR
 phenotype predictions by an ML model compared against the actual values.
 }
+\examples{
+preds <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = factor(
+    rep(c("Resistant", "Susceptible"), each = 5),
+    levels = c("Resistant", "Susceptible")
+  ),
+  .pred_class = factor(
+    c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+    levels = c("Resistant", "Susceptible")
+  ),
+  .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+  .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
+)
+calculateEvalMets(preds)
+}
diff --git a/man/calculateMinSamples.Rd b/man/calculateMinSamples.Rd
index 66a18e1..98347cd 100644
--- a/man/calculateMinSamples.Rd
+++ b/man/calculateMinSamples.Rd
@@ -25,3 +25,7 @@ Returns the minimum number of total observations needed (one bug-drug combo)
 to have at least one observation of the rarer class in each CV fold
 or, in validation mode, across train/val/test.
 }
+\examples{
+calculateMinSamples(n_fold = 5, split = c(1, 0), res_prop = 0.3)
+calculateMinSamples(n_fold = 5, split = c(0.6, 0.2), res_prop = 0.1)
+}
diff --git a/man/computeFeatureFreq.Rd b/man/computeFeatureFreq.Rd
index a0b33f1..3c79dd4 100644
--- a/man/computeFeatureFreq.Rd
+++ b/man/computeFeatureFreq.Rd
@@ -19,3 +19,16 @@ The BH Fisher data table with added feature frequency columns
 \description{
 computeFeatureFreq
 }
+\examples{
+df <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+  gene_a = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+  gene_b = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 0)
+)
+encoded <- encodePhenotype(df)
+fisher <- applyBenjaminiHochberg(
+  runFisherTests(encoded$df, encoded$target), Q = 0.05
+)
+computeFeatureFreq(encoded$df, fisher, encoded$target)
+}
diff --git a/man/createMLResultDir.Rd b/man/createMLResultDir.Rd
index 2e522b2..ec032f0 100644
--- a/man/createMLResultDir.Rd
+++ b/man/createMLResultDir.Rd
@@ -42,17 +42,10 @@ Creates a structured directory hierarchy for storing machine learning results
 including matrices, performance metrics, feature importance, models, and predictions.
 }
 \examples{
-\dontrun{
-# Basic directory structure
-paths <- createMLResultDir("/path/to/results")
-
-# LOO analysis stratified by year
-paths_loo <- createMLResultDir("/path/to/results",
-                                stratify_by = "year",
-                                LOO = TRUE)
-
-# MDR analysis
-paths_mdr <- createMLResultDir("/path/to/results", MDR = TRUE)
-}
+out_dir <- file.path(tempdir(), "amRml_createdir_example")
+dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+createMLResultDir(out_dir)
+createMLResultDir(out_dir, stratify_by = "year", LOO = TRUE)
+createMLResultDir(out_dir, MDR = TRUE)
 
 }
diff --git a/man/createMLinputList.Rd b/man/createMLinputList.Rd
index 2104415..7e15eed 100644
--- a/man/createMLinputList.Rd
+++ b/man/createMLinputList.Rd
@@ -49,8 +49,9 @@ inputs <- createMLinputList("/path/to/results")
 
 # Cross-test with year stratification
 inputs_ct <- createMLinputList("/path/to/results",
-                               stratify_by = "year",
-                               cross_test = TRUE)
+  stratify_by = "year",
+  cross_test = TRUE
+)
 
 # MDR analysis
 inputs_mdr <- createMLinputList("/path/to/results", MDR = TRUE)
diff --git a/man/demo_fit.Rd b/man/demo_fit.Rd
new file mode 100644
index 0000000..b7b38db
--- /dev/null
+++ b/man/demo_fit.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{demo_fit}
+\alias{demo_fit}
+\title{Small demo LR fit}
+\format{
+A fitted \code{workflow} object (output of \code{\link[=fitBestModel]{fitBestModel()}}).
+}
+\source{
+Built by \code{inst/scripts/make_demo_data.R}.
+}
+\usage{
+data(demo_fit)
+}
+\description{
+A tuned logistic-regression workflow fitted on \link{demo_ml_tibble}, for use
+in package examples that require a pre-fitted model.
+}
+\keyword{datasets}
diff --git a/man/demo_ml_tibble.Rd b/man/demo_ml_tibble.Rd
new file mode 100644
index 0000000..4a02e3e
--- /dev/null
+++ b/man/demo_ml_tibble.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{demo_ml_tibble}
+\alias{demo_ml_tibble}
+\title{Small demo ML input tibble}
+\format{
+A tibble with 60 rows and 82 columns: \code{genome_id},
+\code{genome_drug.resistant_phenotype}, and 80 binary feature columns.
+}
+\source{
+Built by \code{inst/scripts/make_demo_data.R}.
+}
+\usage{
+data(demo_ml_tibble)
+}
+\description{
+A subset of the AMP-genes-binary matrix generated from the bundled
+\code{Sfl_parquet.duckdb} fixture, trimmed to 60 rows (30 Resistant, 30
+Susceptible) and ~80 feature columns for use in package examples.
+}
+\keyword{datasets}
diff --git a/man/dot-getTargetVarName.Rd b/man/dot-getTargetVarName.Rd
index cdbf92d..641ca8d 100644
--- a/man/dot-getTargetVarName.Rd
+++ b/man/dot-getTargetVarName.Rd
@@ -23,3 +23,12 @@ either \code{rlang::sym("genome_drug.resistant_phenotype")} or
 Returns the name of the target variable to be used for machine learning:
 either \code{genome_drug.resistant_phenotype} or \code{resistant_classes}
 }
+\examples{
+ml <- tibble::tibble(
+  genome_id = paste0("g", 1:4),
+  genome_drug.resistant_phenotype = c("Resistant", "Susceptible",
+                                      "Resistant", "Susceptible"),
+  feat_a = c(1L, 0L, 1L, 0L)
+)
+.getTargetVarName(ml)
+}
diff --git a/man/encodePhenotype.Rd b/man/encodePhenotype.Rd
index 7067e7d..14993c2 100644
--- a/man/encodePhenotype.Rd
+++ b/man/encodePhenotype.Rd
@@ -27,3 +27,14 @@ A list with:
 \description{
 encodePhenotype
 }
+\examples{
+df <- tibble::tibble(
+  genome_id = paste0("g", 1:6),
+  genome_drug.resistant_phenotype = c("Resistant", "Susceptible", "Resistant",
+                                      "Susceptible", "Resistant", "Susceptible"),
+  gene_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+  gene_b = c(0L, 1L, 1L, 0L, 0L, 1L)
+)
+encoded <- encodePhenotype(df)
+encoded$target
+}
diff --git a/man/extractTopFeats.Rd b/man/extractTopFeats.Rd
index c6ab154..b43cf0c 100644
--- a/man/extractTopFeats.Rd
+++ b/man/extractTopFeats.Rd
@@ -9,14 +9,14 @@ extractTopFeats(fit, prop_vi_top_feats = c(0, 1), n_top_feats = NA)
 \arguments{
 \item{fit}{Best model fit, such as the output of \code{fitBestModel()}}
 
-\item{prop_vi_top_feats}{\link[pillar:num]{pillar::num} A vector of length 2 with elements together
+\item{prop_vi_top_feats}{\link[pillar:num]{num} A vector of length 2 with elements together
 indicating the proportion of total variable importance the top features
 should comprise. To get the features that contribute to the top 10 to 20\% of
 total variable importance, for example, set
 \code{prop_vi_top_feats = c(0.1, 0.2)}.
 Returns all features by default.}
 
-\item{n_top_feats}{\link[pillar:num]{pillar::num} Number of top features to extract}
+\item{n_top_feats}{\link[pillar:num]{num} Number of top features to extract}
 }
 \value{
 A tibble with a column for top features (\code{Variable}), a column for
@@ -27,3 +27,7 @@ per-class columns of importance scores for each \code{Variable})
 Returns the top features that an ML model found to be important for
 predicting the AMR phenotype of a given pathogen.
 }
+\examples{
+data(demo_fit)
+extractTopFeats(demo_fit, n_top_feats = 10)
+}
diff --git a/man/fitBestModel.Rd b/man/fitBestModel.Rd
index 2724cb1..cdc7c58 100644
--- a/man/fitBestModel.Rd
+++ b/man/fitBestModel.Rd
@@ -19,3 +19,15 @@ Best model fit
 \description{
 Fits the best model (output of \code{selectBestModel()}).
 }
+\examples{
+data(demo_ml_tibble)
+data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+train <- rsample::training(data_split)
+wflow <- buildWflow(buildLRModel(), buildRecipe(train))
+set.seed(1)
+tune_res   <- tuneGrid(wflow, data_split,
+                       buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+                       n_fold = 2)
+best_wflow <- selectBestModel(tune_res, wflow, "mcc")
+fitBestModel(best_wflow, train)
+}
diff --git a/man/getConfusionMatrix.Rd b/man/getConfusionMatrix.Rd
index a47b012..1047fba 100644
--- a/man/getConfusionMatrix.Rd
+++ b/man/getConfusionMatrix.Rd
@@ -17,3 +17,18 @@ Confusion matrix of class \code{conf_mat}
 Returns the confusion matrix based on the AMR phenotype predictions by an ML
 model compared against the actual values.
 }
+\examples{
+preds <- tibble::tibble(
+  genome_id = paste0("g", 1:8),
+  genome_drug.resistant_phenotype = factor(
+    rep(c("Resistant", "Susceptible"), each = 4),
+    levels = c("Resistant", "Susceptible")
+  ),
+  .pred_class = factor(
+    c("Resistant", "Resistant", "Susceptible", "Resistant",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible"),
+    levels = c("Resistant", "Susceptible")
+  )
+)
+getConfusionMatrix(preds)
+}
diff --git a/man/getNumFeat.Rd b/man/getNumFeat.Rd
index d616bb2..75f2565 100644
--- a/man/getNumFeat.Rd
+++ b/man/getNumFeat.Rd
@@ -20,3 +20,12 @@ Number of features
 \description{
 Returns the number of features in an amR pangenome.
 }
+\examples{
+ml <- tibble::tibble(
+  genome_id = paste0("g", 1:6),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+  feat_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+  feat_b = c(0L, 1L, 0L, 1L, 0L, 1L)
+)
+getNumFeat(ml)
+}
diff --git a/man/loadMLInputTibble.Rd b/man/loadMLInputTibble.Rd
index 0cdb61b..8e3310b 100644
--- a/man/loadMLInputTibble.Rd
+++ b/man/loadMLInputTibble.Rd
@@ -7,7 +7,7 @@
 loadMLInputTibble(parquet_path)
 }
 \arguments{
-\item{parquet_path}{\link[rlang:vector-construction]{rlang::chr} Path to a long-format sparse Parquet file from
+\item{parquet_path}{\link[rlang:chr]{chr} Path to a long-format sparse Parquet file from
 upstream processing.}
 }
 \value{
@@ -18,3 +18,16 @@ A tibble (one row per genome, one column per feature), plus
 Reads in a Parquet file as a tibble and converts it from long to wide format
 to prepare it for ML.
 }
+\examples{
+long <- tibble::tibble(
+  genome_id = rep(paste0("g", 1:6), each = 2),
+  feature_id = rep(c("gene_a", "gene_b"), 6),
+  value = c(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1),
+  genome_drug.resistant_phenotype = rep(
+    rep(c("Resistant", "Susceptible"), each = 3), each = 2
+  )
+)
+tmp <- tempfile(fileext = ".parquet")
+arrow::write_parquet(long, tmp)
+loadMLInputTibble(tmp)
+}
diff --git a/man/plotBaselineComparison.Rd b/man/plotBaselineComparison.Rd
index 2b6b94d..b212ab2 100644
--- a/man/plotBaselineComparison.Rd
+++ b/man/plotBaselineComparison.Rd
@@ -20,3 +20,14 @@ A bar plot with balanced accuracy comparisons per antibiotic
 Generates a bar plot that compares model performance with and without
 randomly shuffled AMR phenotype labels.
 }
+\examples{
+non_shuffled <- tibble::tibble(
+  antibiotic = c("AMP", "CIP", "CRO"),
+  bal_acc    = c(0.88, 0.81, 0.92)
+)
+shuffled <- tibble::tibble(
+  antibiotic = c("AMP", "CIP", "CRO"),
+  bal_acc    = c(0.52, 0.49, 0.55)
+)
+plotBaselineComparison(non_shuffled, shuffled)
+}
diff --git a/man/plotDefaultEval.Rd b/man/plotDefaultEval.Rd
index 25eeaf9..9eb41ad 100644
--- a/man/plotDefaultEval.Rd
+++ b/man/plotDefaultEval.Rd
@@ -15,16 +15,16 @@ plotDefaultEval(
 \arguments{
 \item{default_eval_tibble}{Output of \code{findOptimalMLDefaults()}}
 
-\item{x_default_eval}{\link[rlang:vector-construction]{rlang::chr} x value of default evaluation plot: "train_prop"
+\item{x_default_eval}{\link[rlang:chr]{chr} x value of default evaluation plot: "train_prop"
 or "n_fold"}
 
-\item{y_default_eval}{\link[rlang:vector-construction]{rlang::chr} y value of default evaluation plot. It can be
+\item{y_default_eval}{\link[rlang:chr]{chr} y value of default evaluation plot. It can be
 "avg_runtime_sec" or one of the following performance metrics:
 "avg_f1_score", "avg_log2_apop", "avg_bal_acc", or "avg_nmcc"}
 
-\item{xlab}{\link[rlang:vector-construction]{rlang::chr} Label for x axis}
+\item{xlab}{\link[rlang:chr]{chr} Label for x axis}
 
-\item{ylab}{\link[rlang:vector-construction]{rlang::chr} Label for y axis}
+\item{ylab}{\link[rlang:chr]{chr} Label for y axis}
 }
 \value{
 A \code{ggplot2} scatterplot (performance metric or runtime vs.
@@ -34,3 +34,13 @@ A \code{ggplot2} scatterplot (performance metric or runtime vs.
 Plots performance metric or runtime vs. training data proportion or number
 of cross-validation folds, colored by model.
 }
+\examples{
+default_eval <- tibble::tibble(
+  train_prop      = c(0.5, 0.6, 0.7, 0.5, 0.6, 0.7),
+  n_fold          = rep(5, 6),
+  model           = rep(c("LR", "RF"), each = 3),
+  avg_f1_score    = c(0.72, 0.78, 0.83, 0.70, 0.75, 0.80)
+)
+plotDefaultEval(default_eval, x_default_eval = "train_prop",
+                y_default_eval = "avg_f1_score")
+}
diff --git a/man/plotFishers.Rd b/man/plotFishers.Rd
index ba515df..53a326f 100644
--- a/man/plotFishers.Rd
+++ b/man/plotFishers.Rd
@@ -35,9 +35,18 @@ Color explicitly encodes whether a feature passes the BH threshold.
 Labels are applied only to the top-ranked features to preserve clarity.
 }
 \examples{
-\dontrun{
-plotFishers(fisher_results)
-plotFishers(fisher_results, label_top_n = 0)
-}
+long <- tibble::tibble(
+  genome_id = rep(paste0("g", 1:10), each = 2),
+  feature_id = rep(c("gene_a", "gene_b"), 10),
+  value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+            0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+  genome_drug.resistant_phenotype = rep(
+    rep(c("Resistant", "Susceptible"), each = 5), each = 2
+  )
+)
+tmp <- tempfile(fileext = ".parquet")
+arrow::write_parquet(long, tmp)
+fisher_results <- runFishers(tmp, Q = 0.05)
+plotFishers(fisher_results, alpha = 0.05, label_top_n = 2)
 
 }
diff --git a/man/plotPRC.Rd b/man/plotPRC.Rd
index e1d3cc4..19fe869 100644
--- a/man/plotPRC.Rd
+++ b/man/plotPRC.Rd
@@ -17,3 +17,20 @@ A precision-recall curve as a \code{ggplot2} object
 Plots the precision-recall curve given a set of test data plus predicted AMR
 phenotypes.
 }
+\examples{
+preds <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = factor(
+    rep(c("Resistant", "Susceptible"), each = 5),
+    levels = c("Resistant", "Susceptible")
+  ),
+  .pred_class = factor(
+    c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+    levels = c("Resistant", "Susceptible")
+  ),
+  .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+  .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
+)
+plotPRC(preds)
+}
diff --git a/man/plotTopFeatsVI.Rd b/man/plotTopFeatsVI.Rd
index e7d3008..9bd920b 100644
--- a/man/plotTopFeatsVI.Rd
+++ b/man/plotTopFeatsVI.Rd
@@ -9,7 +9,7 @@ plotTopFeatsVI(fit, n_top_feats = 10)
 \arguments{
 \item{fit}{Best model fit, such as the output of \code{fitBestModel()}}
 
-\item{n_top_feats}{\link[pillar:num]{pillar::num} Number of top features to plot}
+\item{n_top_feats}{\link[pillar:num]{num} Number of top features to plot}
 }
 \value{
 Variable importance plot (a \code{ggplot2} object)
@@ -18,3 +18,7 @@ Variable importance plot (a \code{ggplot2} object)
 Generates a plot showing the top features and their variable importance
 scores.
 }
+\examples{
+data(demo_fit)
+plotTopFeatsVI(demo_fit, n_top_feats = 10)
+}
diff --git a/man/predictML.Rd b/man/predictML.Rd
index 2da4b8e..53a882c 100644
--- a/man/predictML.Rd
+++ b/man/predictML.Rd
@@ -21,3 +21,9 @@ labels
 Predicts the AMR phenotype in the test data set based on the best fit ML
 model.
 }
+\examples{
+data(demo_ml_tibble)
+data(demo_fit)
+data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+predictML(demo_fit, rsample::testing(data_split))
+}
diff --git a/man/removeTopFeats.Rd b/man/removeTopFeats.Rd
index a6fe1ea..fe4bd79 100644
--- a/man/removeTopFeats.Rd
+++ b/man/removeTopFeats.Rd
@@ -19,3 +19,14 @@ An amR pangenome with top features removed
 \description{
 Removes top features from a pangenome given previous ML results.
 }
+\examples{
+ml <- tibble::tibble(
+  genome_id = paste0("g", 1:6),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+  feat_a = c(1L, 0L, 1L, 0L, 1L, 0L),
+  feat_b = c(0L, 1L, 0L, 1L, 0L, 1L),
+  feat_c = c(1L, 1L, 0L, 0L, 1L, 0L)
+)
+top <- tibble::tibble(Variable = c("feat_a", "feat_c"))
+removeTopFeats(ml, top)
+}
diff --git a/man/runFisherTests.Rd b/man/runFisherTests.Rd
index 96c1fa6..676b36c 100644
--- a/man/runFisherTests.Rd
+++ b/man/runFisherTests.Rd
@@ -19,3 +19,13 @@ A tibble with columns: gene, p_value
 \description{
 runFisherTests
 }
+\examples{
+df <- tibble::tibble(
+  genome_id = paste0("g", 1:10),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 5),
+  gene_a = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+  gene_b = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 0)
+)
+encoded <- encodePhenotype(df)
+runFisherTests(encoded$df, encoded$target)
+}
diff --git a/man/runFishers.Rd b/man/runFishers.Rd
index f654d2d..d90593c 100644
--- a/man/runFishers.Rd
+++ b/man/runFishers.Rd
@@ -29,3 +29,17 @@ The Fisher test results, BH correction, and feature frequencies
 \description{
 runFishers
 }
+\examples{
+long <- tibble::tibble(
+  genome_id = rep(paste0("g", 1:10), each = 2),
+  feature_id = rep(c("gene_a", "gene_b"), 10),
+  value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+            0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+  genome_drug.resistant_phenotype = rep(
+    rep(c("Resistant", "Susceptible"), each = 5), each = 2
+  )
+)
+tmp <- tempfile(fileext = ".parquet")
+arrow::write_parquet(long, tmp)
+runFishers(tmp, Q = 0.05)
+}
diff --git a/man/runIFE.Rd b/man/runIFE.Rd
index 312ceba..b9d2ce7 100644
--- a/man/runIFE.Rd
+++ b/man/runIFE.Rd
@@ -19,28 +19,28 @@ runIFE(
 \arguments{
 \item{ml_input_tibble}{An ML-ready tibble generated by \code{loadMLInputTibble()}}
 
-\item{by_num}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} if removing top features as a percentage
+\item{by_num}{\link[arrow:bool]{bool} Set to \code{TRUE} if removing top features as a percentage
 of the total number of features.}
 
-\item{by_vi}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} if removing top features by their
+\item{by_vi}{\link[arrow:bool]{bool} Set to \code{TRUE} if removing top features by their
 contribution to the total variable importance.}
 
-\item{percent_removal_vec}{\link[pillar:num]{pillar::num} A vector of percentages of total top
+\item{percent_removal_vec}{\link[pillar:num]{num} A vector of percentages of total top
 features removed (if \code{by_num = TRUE} and \code{by_vi = FALSE}) or percent
 contributions of top features removed to the total variable importance (if
 \code{by_vi = TRUE} and \code{by_num = FALSE}) at each iteration. The function will
 automatically train with all features to start, so there is no need to
 specify 0 in this vector.}
 
-\item{mix_vec}{\link[pillar:num]{pillar::num} A vector containing mixture values (0 corresponds
+\item{mix_vec}{\link[pillar:num]{num} A vector containing mixture values (0 corresponds
 to L2 regularization; 1 corresponds to L1; intermediate values correspond to
 elastic net) to try. The default corresponds to L2 for the purpose of IFE
 analysis.}
 
-\item{return_feats}{\link[arrow:data-type]{arrow::bool} Whether to return top features from each iteration
+\item{return_feats}{\link[arrow:bool]{bool} Whether to return top features from each iteration
 of IFE.}
 
-\item{verbose}{\link[arrow:data-type]{arrow::bool} The function will stay quiet if set to \code{FALSE}.}
+\item{verbose}{\link[arrow:bool]{bool} The function will stay quiet if set to \code{FALSE}.}
 }
 \value{
 A tibble with IFE performance (note: this will be returned within a
@@ -51,3 +51,12 @@ runIFE
 Removes top features identified by ML models and retrains iteratively;
 returns nMCC at each iteration.
 }
+\examples{
+data(demo_ml_tibble)
+set.seed(1)
+runIFE(
+  demo_ml_tibble, by_num = TRUE, by_vi = FALSE,
+  percent_removal_vec = c(25, 50), mix_vec = 0,
+  return_feats = TRUE, verbose = FALSE
+)
+}
diff --git a/man/runMDRmodels.Rd b/man/runMDRmodels.Rd
index 325c14a..74be1e3 100644
--- a/man/runMDRmodels.Rd
+++ b/man/runMDRmodels.Rd
@@ -52,7 +52,7 @@ NULL (invisible). Called for side effects (model training and result saving).
 }
 \description{
 Executes machine learning pipeline for MDR analysis using logistic regression
-with parallel processing via the future backend. Trains models on all MDR
+with parallel processing via BiocParallel. Trains models on all MDR
 parquet files and saves results to designated output directories.
 }
 \examples{
@@ -62,13 +62,15 @@ runMDRmodels("/path/to/results")
 
 # Run with more threads and minimal output
 runMDRmodels("/path/to/results",
-             threads = 32,
-             verbose = FALSE)
+  threads = 32,
+  verbose = FALSE
+)
 
 # Run without saving model fits (save disk space)
 runMDRmodels("/path/to/results",
-             threads = 16,
-             return_fit = FALSE)
+  threads = 16,
+  return_fit = FALSE
+)
 }
 
 }
diff --git a/man/runMLPipeline.Rd b/man/runMLPipeline.Rd
index 31c542e..2b53951 100644
--- a/man/runMLPipeline.Rd
+++ b/man/runMLPipeline.Rd
@@ -35,10 +35,10 @@ classification for one bug/drug combination) or \code{resistant_classes}
 (multi-class classification for determining the drug classes to which each
 genome is resistant), but not both.}
 
-\item{model}{\link[rlang:vector-construction]{rlang::chr} Logistic regression ("LR"), random forest ("RF"), or
+\item{model}{\link[rlang:chr]{chr} Logistic regression ("LR"), random forest ("RF"), or
 boosted tree ("BT")}
 
-\item{split}{\link[pillar:num]{pillar::num} Vector of length 2 indicating the proportion of data to
+\item{split}{\link[pillar:num]{num} Vector of length 2 indicating the proportion of data to
 be designated as training and validation, respectively. Note: if \code{test_data}
 is provided, these numbers will be scaled so that they sum to 1 and will
 still represent fractions of \code{ml_input_tibble} (not including the input
@@ -47,26 +47,26 @@ function is not equipped to handle this. If cross-validation is enabled here
 \code{split = c(1,0)}, we will still retain a 20\% test holdout for final reporting.
 Cross-validation is run on the 80\% training portion, and not on the testing set.}
 
-\item{n_fold}{\link[pillar:num]{pillar::num} Number of folds of cross-validation}
+\item{n_fold}{\link[pillar:num]{num} Number of folds of cross-validation}
 
-\item{prop_vi_top_feats}{\link[pillar:num]{pillar::num} A vector of length 2 with elements together
+\item{prop_vi_top_feats}{\link[pillar:num]{num} A vector of length 2 with elements together
 indicating the proportion of total variable importance the top features
 should comprise. To get the features that contribute to the top 10 to 20\% of
 total variable importance, for example,
 \verb{set prop_vi_top_feats = c(0.1, 0.2)}. Returns all features by default.}
 
-\item{n_top_feats}{\link[pillar:num]{pillar::num} Number of top features to extract per drug}
+\item{n_top_feats}{\link[pillar:num]{num} Number of top features to extract per drug}
 
-\item{use_pca}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to use PCA instead of all features.}
+\item{use_pca}{\link[arrow:bool]{bool} Set to \code{TRUE} to use PCA instead of all features.}
 
-\item{pca_threshold}{\link[pillar:num]{pillar::num} The proportion of total variance for which the
+\item{pca_threshold}{\link[pillar:num]{num} The proportion of total variance for which the
 principle components account}
 
-\item{penalty_vec}{\link[pillar:num]{pillar::num} A vector containing \code{penalty} (regularization
+\item{penalty_vec}{\link[pillar:num]{num} A vector containing \code{penalty} (regularization
 strength) values to try (for logistic regression). It is recommended to
 choose values \code{10^-4} to \code{10^4}.}
 
-\item{mix_vec}{\link[pillar:num]{pillar::num} A vector containing \code{mixture} values to try for logistic
+\item{mix_vec}{\link[pillar:num]{num} A vector containing \code{mixture} values to try for logistic
 regression. 0 corresponds to L2 regularization; 1 corresponds to L1;
 intermediate values correspond to elastic net.}
 
@@ -78,12 +78,12 @@ or boosted tree. It is recommended to choose values in the range 1 to 100.}
 \code{trees} in random forest or boosted tree. It is recommended to choose values
 in the range 100 to 1000.}
 
-\item{select_best_metric}{\link[rlang:vector-construction]{rlang::chr} Metric to select best model: "f_meas",
+\item{select_best_metric}{\link[rlang:chr]{chr} Metric to select best model: "f_meas",
 "pr_auc", or "bal_accuracy"}
 
-\item{seed}{\link[pillar:num]{pillar::num} For reproducible analysis}
+\item{seed}{\link[pillar:num]{num} For reproducible analysis}
 
-\item{shuffle_labels}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to randomly shuffle AMR phenotype
+\item{shuffle_labels}{\link[arrow:bool]{bool} Set to \code{TRUE} to randomly shuffle AMR phenotype
 labels for baseline comparisons.}
 
 \item{test_data}{A tibble to use as testing data instead of a subset of
@@ -91,14 +91,14 @@ labels for baseline comparisons.}
 or temporal holdouts. The \code{split} argument still tells how \code{ml_input_tibble}
 should be divided for training and validation.}
 
-\item{return_tune_res}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to return tuning results.}
+\item{return_tune_res}{\link[arrow:bool]{bool} Set to \code{TRUE} to return tuning results.}
 
-\item{return_fit}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to return the model \code{fit}.}
+\item{return_fit}{\link[arrow:bool]{bool} Set to \code{TRUE} to return the model \code{fit}.}
 
-\item{return_pred}{\link[arrow:data-type]{arrow::bool} Set to \code{TRUE} to return the predicted and actual
+\item{return_pred}{\link[arrow:bool]{bool} Set to \code{TRUE} to return the predicted and actual
 AMR phenotypes.}
 
-\item{verbose}{\link[arrow:data-type]{arrow::bool} The function will stay quiet if set to \code{FALSE}.}
+\item{verbose}{\link[arrow:bool]{bool} The function will stay quiet if set to \code{FALSE}.}
 }
 \value{
 A list with two elements: A \code{performance_tibble} and a
@@ -109,3 +109,13 @@ respectively, are set to \code{TRUE}.
 \description{
 Stitches together core ML functions into one pipeline.
 }
+\examples{
+data(demo_ml_tibble)
+set.seed(1)
+runMLPipeline(
+  ml_input_tibble = demo_ml_tibble, model = "LR",
+  split = c(1, 0), n_fold = 2,
+  penalty_vec = 10^c(-3, -1), mix_vec = c(0, 0.5, 1),
+  n_top_feats = 10, verbose = FALSE
+)
+}
diff --git a/man/runMLmodels.Rd b/man/runMLmodels.Rd
index cb44718..501e8d8 100644
--- a/man/runMLmodels.Rd
+++ b/man/runMLmodels.Rd
@@ -121,8 +121,7 @@ For example: \code{"LOO_cross_test_ML_year_performance.tsv"}
 \note{
 This function requires the following packages:
 \itemize{
-\item \pkg{future} - for parallel processing backend
-\item \pkg{future.apply} - for parallel lapply
+\item \pkg{BiocParallel} - for parallel processing backend
 \item \pkg{readr} - for reading/writing TSV files
 \item \pkg{dplyr}, \pkg{purrr}, \pkg{stringr}, \pkg{tibble} - for data manipulation
 }
@@ -143,21 +142,24 @@ runMLmodels("/path/to/results", stratify_by = "year")
 
 # Cross-test with year stratification
 runMLmodels("/path/to/results",
-            stratify_by = "year",
-            cross_test = TRUE,
-            threads = 32)
+  stratify_by = "year",
+  cross_test = TRUE,
+  threads = 32
+)
 
 # LOO analysis stratified by country with cross-testing
 runMLmodels("/path/to/results",
-            stratify_by = "country",
-            LOO = TRUE,
-            cross_test = TRUE,
-            verbose = TRUE)
+  stratify_by = "country",
+  LOO = TRUE,
+  cross_test = TRUE,
+  verbose = TRUE
+)
 
 # Run without saving model fits (save disk space)
 runMLmodels("/path/to/results",
-            stratify_by = "year",
-            return_fit = FALSE)
+  stratify_by = "year",
+  return_fit = FALSE
+)
 }
 
 }
diff --git a/man/runModelingPipeline.Rd b/man/runModelingPipeline.Rd
index 84ea022..a0763be 100644
--- a/man/runModelingPipeline.Rd
+++ b/man/runModelingPipeline.Rd
@@ -49,3 +49,14 @@ Given a DuckDB file produced by \code{runDataProcessing()}, it:
 \item Saves performance metrics, fitted models, predictions, and top feature rankings
 }
 }
+\examples{
+\dontrun{
+runModelingPipeline(
+  parquet_duckdb_path = "path/to/Bug_parquet.duckdb",
+  threads = 16, n_fold = 5, split = c(1, 0), min_n = 25,
+  prop_vi_top_feats = c(0, 1), pca_threshold = 0.99,
+  verbose = TRUE, use_saved_split = TRUE
+)
+}
+
+}
diff --git a/man/selectBestModel.Rd b/man/selectBestModel.Rd
index 23232bf..c1f6f2d 100644
--- a/man/selectBestModel.Rd
+++ b/man/selectBestModel.Rd
@@ -11,7 +11,7 @@ selectBestModel(tune_res, wflow, select_best_metric = "mcc")
 
 \item{wflow}{A \code{workflows} object, such as the output of \code{buildWflow()}}
 
-\item{select_best_metric}{\link[rlang:vector-construction]{rlang::chr} Metric to select best model: "f_meas",
+\item{select_best_metric}{\link[rlang:chr]{chr} Metric to select best model: "f_meas",
 "pr_auc", "mcc", or "bal_accuracy"}
 }
 \value{
@@ -20,3 +20,14 @@ Best model workflow
 \description{
 Selects best model by F1 score, AUPRC, MCC, or balanced accuracy.
 }
+\examples{
+data(demo_ml_tibble)
+data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+wflow <- buildWflow(buildLRModel(),
+                    buildRecipe(rsample::training(data_split)))
+set.seed(1)
+tune_res <- tuneGrid(wflow, data_split,
+                     buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+                     n_fold = 2)
+selectBestModel(tune_res, wflow, "mcc")
+}
diff --git a/man/shuffleLabels.Rd b/man/shuffleLabels.Rd
index 9c672b5..83a6c72 100644
--- a/man/shuffleLabels.Rd
+++ b/man/shuffleLabels.Rd
@@ -14,7 +14,7 @@ classification for one bug/drug combination) or \code{resistant_classes}
 (multi-class classification for determining the drug classes to which each
 genome is resistant), but not both.}
 
-\item{seed}{\link[pillar:num]{pillar::num} For reproducible analysis}
+\item{seed}{\link[pillar:num]{num} For reproducible analysis}
 }
 \value{
 Pangenome with randomly shuffled AMR phenotype labels
@@ -22,3 +22,11 @@ Pangenome with randomly shuffled AMR phenotype labels
 \description{
 Randomly shuffles the AMR phenotype labels in the ML-ready tibble.
 }
+\examples{
+ml <- tibble::tibble(
+  genome_id = paste0("g", 1:6),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 3),
+  feat_a = c(1L, 0L, 1L, 0L, 1L, 0L)
+)
+shuffleLabels(ml, seed = 42)
+}
diff --git a/man/splitMLInputTibble.Rd b/man/splitMLInputTibble.Rd
index 7f774b9..bfa3254 100644
--- a/man/splitMLInputTibble.Rd
+++ b/man/splitMLInputTibble.Rd
@@ -14,10 +14,10 @@ classification for one bug/drug combination) or \code{resistant_classes}
 (multi-class classification for determining the drug classes to which each
 genome is resistant), but not both.}
 
-\item{split}{\link[pillar:num]{pillar::num} Vector of length 2 indicating the proportion of data to
+\item{split}{\link[pillar:num]{num} Vector of length 2 indicating the proportion of data to
 be designated as training and validation, respectively.}
 
-\item{seed}{\link[pillar:num]{pillar::num} For reproducible analysis}
+\item{seed}{\link[pillar:num]{num} For reproducible analysis}
 }
 \value{
 An \code{rsplit} object
@@ -26,3 +26,12 @@ An \code{rsplit} object
 Splits an ML-ready tibble into training and testing (and in some cases
 validation) sets.
 }
+\examples{
+ml <- tibble::tibble(
+  genome_id = paste0("g", 1:20),
+  genome_drug.resistant_phenotype = rep(c("Resistant", "Susceptible"), each = 10),
+  feat_a = rep(c(0L, 1L), 10),
+  feat_b = rep(c(1L, 0L), 10)
+)
+splitMLInputTibble(ml, split = c(1, 0), seed = 42)
+}
diff --git a/man/tuneGrid.Rd b/man/tuneGrid.Rd
index d03a8c3..90fc4e9 100644
--- a/man/tuneGrid.Rd
+++ b/man/tuneGrid.Rd
@@ -15,7 +15,7 @@ tuneGrid(wflow, data_split, grid = buildTuningGrid(model = "LR"), n_fold = 5)
 \item{grid}{A tuning grid as a tibble, such as the output of
 \code{buildTuningGrid()}}
 
-\item{n_fold}{\link[pillar:num]{pillar::num} Number of folds of cross-validation}
+\item{n_fold}{\link[pillar:num]{num} Number of folds of cross-validation}
 }
 \value{
 Results of grid tuning
@@ -24,3 +24,12 @@ Results of grid tuning
 Tunes the model according to the tuning grid, workflow, and data split
 specified.
 }
+\examples{
+data(demo_ml_tibble)
+data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
+wflow <- buildWflow(buildLRModel(),
+                    buildRecipe(rsample::training(data_split)))
+grid  <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
+set.seed(1)
+tuneGrid(wflow, data_split, grid, n_fold = 2)
+}
diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd
index af5bc8e..89f3b18 100644
--- a/vignettes/intro.Rmd
+++ b/vignettes/intro.Rmd
@@ -1,17 +1,19 @@
 ---
 title: "Introduction to amRml"
-output: rmarkdown::html_vignette
+output:
+  BiocStyle::html_document:
+    fig_width: 7
+    fig_height: 5
 vignette: >
   %\VignetteIndexEntry{Introduction to amRml}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
 
-```{r, include = FALSE}
+```{r setup-opts, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
-  comment = "#>",
-  eval = FALSE
+  comment  = "#>"
 )
 ```
 
@@ -21,354 +23,285 @@ library(amRml)
 
 ## Overview
 
-`amRml` is part of a 3-package suite for predicting antimicrobial resistance (AMR) using machine learning models trained on bacterial genomic features.
+`amRml` predicts antimicrobial resistance (AMR) from bacterial genomic features. It consumes a DuckDB produced by `amRdata` and produces ML matrices, tuned logistic regression models, per-genome predictions, feature importances, and Fisher's exact tests as a non-ML baseline.
 
-This vignette demonstrates how to generate ML input matrices, train models, and visualize results.
+This vignette uses the *Shigella flexneri* (`Sfl`) DuckDB bundled in `inst/extdata`.
+
+```{r fixture}
+fixture <- system.file("extdata", "Sfl_parquet.duckdb", package = "amRml")
+out_dir <- file.path(tempdir(), "amRml_vignette")
+dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+```
 
 ## Generating ML input matrices
 
-After data curation with `amRdata`, use `generateMLInputs()` to create ML-ready matrices from the DuckDB containing metadata and feature parquets.
-The DuckDB with the parquet views is created with `cleanData()`
+`generateMLInputs()` reads the bug-level DuckDB (metadata + feature parquets) and writes one long-format sparse parquet per drug × feature × encoding combination into `out_path/matrix/`. With `stratify_by`, it additionally writes year- or country-stratified matrices into `matrix_year/` or `matrix_country/`.
 
 ```{r generate-matrices}
-# Generate all ML input matrices from curated data
 generateMLInputs(
-  parquet_duckdb_path = "results/Sfl_parquet.duckdb",
-  out_path = "results/",
-  n_fold = 5,
-  split = c(1, 0), # CV mode (or use split = 0 as shorthand)
-  min_n = 25,
-  verbosity = "minimal"
+  parquet_duckdb_path = fixture,
+  out_path            = out_dir,
+  n_fold              = 5,
+  split               = c(1, 0),
+  min_n               = 25,
+  verbosity           = "minimal"
 )
-```
-
-This generates:
-
-- Drug and drug-class matrices for all feature types (genes, proteins, domains, struct)
-- Year-stratified and country-stratified matrices
-- Leave-one-out (LOO) matrices for temporal/geographic holdouts
-- Multi-drug resistance (MDR) matrices
-
-For classical train/validation/test splits instead of cross-validation:
 
-```{r generate-splits}
-# 70% train, 15% validation, 15% test
-generateMLInputs(
-  parquet_duckdb_path = "results/Sfl_parquet.duckdb",
-  out_path = "results/",
-  n_fold = NULL,
-  split = c(0.7, 0.15),
-  verbosity = "debug"
-)
+list.files(file.path(out_dir, "matrix"))[1:5]
 ```
 
-Use `skipImbalancedMatrix()` to check if a drug-bug combination has sufficient samples:
+For a classical train/validation/test split instead of cross-validation, set `n_fold = NULL` and pass a length-2 `split` of `c(train_prop, val_prop)`; the test proportion is the remainder.
 
-```{r skip-check}
-skip <- skipImbalancedMatrix(
-  genome_ids = genome_ids,
-  phenotype_counts = phenotype_counts,
-  n_fold = 5,
-  split = c(1, 0),
-  min_total_obs = 40
+```{r generate-splits, eval = FALSE}
+generateMLInputs(
+  parquet_duckdb_path = fixture,
+  out_path            = out_dir,
+  n_fold              = NULL,
+  split               = c(0.7, 0.15),
+  verbosity           = "debug"
 )
 ```
 
-## Loading ML matrices
+## Loading a single matrix
 
-The package expects Parquet files in long (sparse) format with these columns:
+The generated parquets are long-format sparse tibbles with these columns:
 
 | Column | Description |
 |--------|-------------|
 | `genome_id` | Unique identifier for each isolate |
 | `feature_id` | Feature name (gene, protein, domain, or struct) |
-| `value` | Binary presence/absence (0 or 1) or count |
-| `genome_drug.resistant_phenotype` | "Resistant" or "Susceptible" |
+| `value` | Binary presence/absence (0/1) or count |
+| `genome_drug.resistant_phenotype` | `"Resistant"` or `"Susceptible"` |
 
-`loadMLInputTibble()` converts this to wide format (one row per genome, one column per feature) for ML.
+`loadMLInputTibble()` converts one of them to wide format (one row per genome, one column per feature) ready for ML.
 
 ```{r load-data}
-# Load a generated matrix
-ml_tibble <- loadMLInputTibble(
-  parquet_path = "results/matrix/Sfl_drug_AMP_domains_binary_sparse.parquet"
+matrix_path <- file.path(
+  out_dir, "matrix", "Sfl_drug_AMP_genes_binary_sparse.parquet"
 )
 
-# Check dimensions
+ml_tibble  <- loadMLInputTibble(matrix_path)
 n_features <- getNumFeat(ml_tibble)
-target_var <- getTargetVarName(ml_tibble)
+target_var <- .getTargetVarName(ml_tibble)
+
+c(n_features = n_features, target_var = target_var)
 ```
 
-## Main pipeline
+## Per-matrix pipeline
 
-The primary entry point is `runMLPipeline()`, which orchestrates the entire ML workflow:
+`runMLPipeline()` runs the train/tune/fit/predict pipeline on a single matrix in memory. Use it to iterate on one drug-feature combo before scaling to all of them.
 
 ```{r run-pipeline}
 results <- runMLPipeline(
-  ml_input_tibble = ml_tibble,
-  model = "LR",
-  split = c(0.6, 0.2),
-  n_fold = 2,
-  n_top_feats = 20,
+  ml_input_tibble    = ml_tibble,
+  model              = "LR",
+  split              = c(1, 0),
+  n_fold             = 2,
+  n_top_feats        = 20,
+  penalty_vec        = 10^c(-3, -1),
+  mix_vec            = c(0, 0.5, 1),
   select_best_metric = "mcc",
-  return_fit = TRUE,
-  return_pred = TRUE,
-  verbose = TRUE
+  return_fit         = TRUE,
+  return_pred        = TRUE,
+  verbose            = FALSE
 )
 
-# View results
 results$performance_tibble
-results$top_feat_tibble
+head(results$top_feat_tibble)
 ```
 
 ### Output structure
 
-`runMLPipeline()` returns a list with:
+`runMLPipeline()` returns a named list:
 
-**`performance_tibble`** - model performance metrics:
+**`performance_tibble`** — one row of model performance metrics:
 
 | Column | Description |
 |--------|-------------|
 | `num_obs` | Number of observations |
 | `res_prop` | Proportion of resistant samples |
 | `n_feat` | Number of features |
-| `model` | Model type (LR) |
+| `model` | Model type (`"LR"`) |
 | `train_prop`, `val_prop` | Train/validation split proportions |
 | `fit_penalty`, `fit_mixture` | Fitted hyperparameters |
 | `nmcc`, `f1`, `bal_acc`, `log2_apop` | Performance metrics |
 | `run_time_sec` | Runtime in seconds |
 
-**`top_feat_tibble`** - ranked feature importance:
+**`top_feat_tibble`** — ranked feature importance:
 
 | Column | Description |
 |--------|-------------|
 | `Variable` | Feature name |
 | `Importance` | Variable importance score |
-| `Sign` | Direction of effect (POS = resistance, NEG = susceptibility) |
+| `Sign` | Direction of effect (`POS` = associated with resistance, `NEG` = with susceptibility) |
 
 **Optional outputs** (when `return_*` = TRUE):
 
-- `tune_res` - tuning results from grid search
-- `fit` - fitted model workflow object
-- `pred` - predictions with `.pred_class`, `.pred_Resistant`, `.pred_Susceptible`
+- `tune_res` — tuning results from grid search
+- `fit` — the fitted workflow object
+- `pred` — predictions with `.pred_class`, `.pred_Resistant`, `.pred_Susceptible`
 
 ## Step-by-step model building
 
-For more control, use the individual tidymodels-based functions:
+The builders below are what `runMLPipeline()` chains internally. Call them directly when you need control over any individual step.
 
 ```{r split-data}
-# Split data
 data_split <- splitMLInputTibble(ml_tibble, split = c(0.6, 0.2), seed = 123)
-
-library(rsample)
-train_data <- training(data_split)
-test_data <- testing(data_split)
+train_data <- rsample::training(data_split)
+test_data  <- rsample::testing(data_split)
 ```
 
 ```{r build-model}
-# Build recipe (preprocessing)
-recipe <- buildRecipe(train_data, use_pca = FALSE, pca_threshold = 0.95)
+recipe <- buildRecipe(train_data, use_pca = FALSE)
+lr_mod <- buildLRModel(multi_class = FALSE)
+wflow  <- buildWflow(lr_mod, recipe)
 
-# Build logistic regression model
-lr_model <- buildLRModel(multi_class = FALSE)
-
-# Build workflow
-wflow <- buildWflow(lr_model, recipe)
-
-# Build tuning grid
 grid <- buildTuningGrid(
-  model = "LR",
-  penalty_vec = 10^seq(-4, -1, length.out = 10),
-  mix_vec = 0:5 / 5
+  model       = "LR",
+  penalty_vec = 10^c(-3, -1),
+  mix_vec     = c(0, 0.5, 1)
 )
 ```
 
 ```{r tune-fit}
-# Tune hyperparameters
-tune_res <- tuneGrid(wflow, data_split, grid, n_fold = 5)
-
-# Select and fit best model
+tune_res   <- tuneGrid(wflow, data_split, grid, n_fold = 2)
 best_wflow <- selectBestModel(tune_res, wflow, select_best_metric = "mcc")
-fit <- fitBestModel(best_wflow, train_data)
+fit        <- fitBestModel(best_wflow, train_data)
 
-# Get fitted hyperparameters
-fit_hps <- getFitHps(fit)
-
-# Make predictions
-predictions <- predict(fit, test_data)
+preds <- predictML(fit, test_data)
 ```
 
 ## Performance metrics
 
+`calculateEvalMets()` returns all of nMCC, F1, balanced accuracy, AUPRC, log2(AUPRC/prior), sensitivity, and specificity from a tibble of predictions + truth.
+
 ```{r metrics}
-# Individual metrics
-nmcc <- calculatenMCC(predictions) # Normalized MCC (0-1 scale)
-f1 <- calculateF1(predictions) # F1 score
-bal_acc <- calculateBalAcc(predictions) # Balanced accuracy
-auprc <- calculateAUPRC(predictions) # Area under PR curve
-log2_apop <- calculateLog2APOP(predictions) # log2(AUPRC/prior)
-
-# All metrics at once
-all_metrics <- calculateEvalMets(predictions)
-
-# Confusion matrix
-conf_mat <- getConfusionMatrix(predictions)
+calculateEvalMets(preds)
+getConfusionMatrix(preds)
 ```
 
-| Metric | Function | Description |
-|--------|----------|-------------|
-| nMCC | `calculatenMCC()` | Normalized Matthews correlation coefficient (0-1) |
-| F1 | `calculateF1()` | Harmonic mean of precision and recall |
-| Balanced accuracy | `calculateBalAcc()` | Average of sensitivity and specificity |
-| AUPRC | `calculateAUPRC()` | Area under precision-recall curve |
-| log2(AUPRC/prior) | `calculateLog2APOP()` | Class-imbalance adjusted AUPRC |
-
 ## Feature importance
 
+`extractTopFeats()` ranks features by absolute coefficient (for LR). Use `n_top_feats` for a fixed count or `prop_vi_top_feats` for a percentile range.
+
 ```{r feature-importance}
-# Extract top features from fitted model by number
 top_features <- extractTopFeats(fit, n_top_feats = 20)
+head(top_features)
+```
 
-# Or by proportion of variable importance
-top_features <- extractTopFeats(fit, prop_vi_top_feats = c(0, 0.2))
+## Visualization
 
-# Columns: Variable, Importance, Sign
-# Positive Sign = associated with resistance
-# Negative Sign = associated with susceptibility
+```{r plot-prc, fig.width = 5, fig.height = 4}
+plotPRC(results$pred)
 ```
 
-## Iterative feature elimination (IFE)
+```{r plot-vi, fig.width = 5, fig.height = 4}
+plotTopFeatsVI(results$fit, n_top_feats = 10)
+```
 
-IFE iteratively removes top features to identify the most predictive subset:
+For a baseline comparison against random labels, fit a shuffled-label pipeline and compare:
 
-```{r ife}
-ife_results <- runIFE(
-  ml_tibble,
-  by_num = TRUE, # Remove by number of features
-  by_vi = FALSE, # Or remove by variable importance
-  percent_removal_vec = 10 * 1:9, # Remove 10%, 20%, ..., 90%
-  mix_vec = 0, # L2 regularization for IFE
-  return_feats = TRUE,
-  verbose = TRUE
+```{r plot-baseline, eval = FALSE}
+shuffled <- runMLPipeline(
+  ml_input_tibble = ml_tibble,
+  model           = "LR",
+  split           = c(1, 0),
+  n_fold          = 2,
+  shuffle_labels  = TRUE,
+  return_pred     = TRUE
 )
 
-# Results include nMCC at each iteration
-ife_results$ife_performance_tibble
-ife_results$feats_removed
-
-# Remove specific features manually
-ml_tibble_reduced <- removeTopFeats(ml_tibble, top_features)
+plotBaselineComparison(
+  non_shuffled_label_results = results$performance_tibble,
+  shuffled_label_results     = shuffled$performance_tibble
+)
 ```
 
-## Visualization
-
-### Precision-recall curve
+## Iterative feature elimination
 
-```{r plot-prc}
-test_data_plus_predictions <- readr::read_tsv(results / ML_pred / Sfl_drug_AMP_domains_binary_prediction.tsv)
-plotPRC(test_data_plus_predictions)
-```
-### ROC curve
+`runIFE()` retrains the model after iteratively removing top-ranked features, helping identify the minimal predictive subset. It runs the pipeline once per percentile in `percent_removal_vec`
 
-```{r plot-roc}
-test_data_plus_predictions <- readr::read_tsv(results / ML_pred / Sfl_drug_AMP_domains_binary_prediction.tsv)
-plotROC(test_data_plus_predictions)
-```
-### Variable importance plot
+```{r ife, eval = FALSE}
+ife_results <- runIFE(
+  ml_tibble,
+  by_num              = TRUE,
+  by_vi               = FALSE,
+  percent_removal_vec = 10 * 1:9,
+  mix_vec             = 0,
+  return_feats        = TRUE,
+  verbose             = FALSE
+)
 
-```{r plot-vi}
-topfeat <- readr::read_tsv(results / ML_top_features / Sfl_drug_AMP_domains_binary_top_features.tsv)
-plotTopFeatsVI(topfeat)
+ife_results$ife_performance_tibble
+ife_results$feats_removed
 ```
-### Baseline comparison barplot
-non_shuffled_label_results Output of `runMLPipeline(shuffle_labels = FALSE)`
-shuffled_label_results Output of `runMLPipeline(shuffle_labels = TRUE)`
-```{r plot-baseline}
-# Run with shuffled labels for baseline
-baseline_results <- runMLPipeline(
-  ml_input_tibble = ml_tibble,
-  model = "LR",
-  split = c(0.6, 0.2),
-  shuffle_labels = TRUE
-)
 
+`removeTopFeats()` strips a given set of features from a matrix tibble if you want to do this manually:
 
-# Compare performance
-getBaselineComparisonBarplot(
-  non_shuffled_label_results = results$performance_tibble,
-  shuffled_label_results = baseline_results$performance_tibble
-)
+```{r remove-feats}
+trimmed <- removeTopFeats(ml_tibble, head(top_features, 5))
+ncol(ml_tibble) - ncol(trimmed)
 ```
 
-## Fisher's exact tests (baseline comparison)
+## Fisher's exact tests (non-ML baseline)
 
-As a non-ML baseline, run Fisher's exact tests with multiple testing correction:
+`runFishers()` runs a Fisher's exact test of feature presence vs. phenotype for each feature, applies Benjamini–Hochberg correction, and computes per-class frequencies.
 
 ```{r fisher}
-# Complete Fisher pipeline
 fisher_results <- runFishers(
-  matrix_path = "results/matrix/Cje_drug_CIP_genes_binary_sparse.parquet",
-  Q = 0.05,
-  alternative = "two.sided",
+  matrix_path       = matrix_path,
+  Q                 = 0.05,
+  alternative       = "two.sided",
   susceptible_label = "Susceptible",
-  resistant_label = "Resistant"
+  resistant_label   = "Resistant"
 )
 
-# Or step-by-step:
-encoded <- encodePhenotype(ml_tibble)
-fisher_res <- runFisherTests(encoded$df, encoded$target, alternative = "two.sided")
-fisher_res <- applyBenjaminiHochberg(fisher_res, Q = 0.05)
-fisher_res <- computeFeatureFreq(encoded$df, fisher_res, encoded$target)
+head(fisher_results)
 ```
 
-### Plot Fisher's results
-You can label the top N features to highlight the strongest hits (default is 5) or change the BH-adjusted p-value threshold with the alpha argument
-
-```{r}
-plotFishers(fisher_results)
-plotFishers(fisher_results, alpha = 0.01, label_top_n = 5)
+```{r plot-fishers, fig.width = 5, fig.height = 4}
+plotFishers(fisher_results, alpha = 0.05, label_top_n = 5)
 ```
 
-## Wrapper to run all models 
-Given a DuckDB file produced by `runDataProcessing()`, it:
-1. generates all ML feature matrices (drug, class, year, country, MDR, LOO)
-2. creates all ML directory structures
-3. prepares ML input lists for every mode
-4. runs logistic regression ML models (standard + stratified + MDR)
-5. saves performance metrics, fitted models, predictions, and top feature rankings
-``` {r}
-runModelingPipeline(parquet_duckdb_path,
-  threads = 16,
-  n_fold = 5,
-  split = c(1, 0),
-  min_n = 25,
-  prop_vi_top_feats = c(0, 1),
-  pca_threshold = 0.99,
-  verbose = TRUE,
+## Training all models with `runMLmodels`
+
+`runMLmodels()` trains a model on every matrix produced by `generateMLInputs()` and writes performance TSVs into `out_path/ML_performance/`, predictions into `ML_pred/`, and top features into `ML_top_features/`. Takes over an hour on Sfl with default settings.
+
+```{r run-all-models, eval = FALSE}
+runMLmodels(
+  path            = out_dir,
+  stratify_by     = NULL,
+  LOO             = FALSE,
+  cross_test      = FALSE,
+  threads         = max(1L, parallel::detectCores() - 1L),
+  split           = c(1, 0),
+  n_fold          = 5,
+  verbose         = TRUE,
+  return_pred     = TRUE,
   use_saved_split = TRUE
 )
 ```
 
-Merge the performance and top features of each kind of models into a parquet that will serve as starting data for `amRshiny` package
-
-``` {r}
-buildPerformancePq(
-  path = "results/ML_perf",
-  stratify_by = NULL,
-  LOO = FALSE,
-  MDR = FALSE,
-  cross_test = FALSE,
-  out_parquet = NULL,
-  compression = "zstd",
-  verbose = TRUE
-)
-buildTopFeatsPq(
-  path = "results/ML_top_features",
-  stratify_by = NULL,
-  LOO = FALSE,
-  MDR = FALSE,
-  cross_test = FALSE,
-  out_parquet = NULL,
-  compression = "zstd",
-  verbose = TRUE
+## End-to-end: `runModelingPipeline`
+
+For the full pipeline from a DuckDB to all outputs in one call:
+
+```{r run-modeling-pipeline, eval = FALSE}
+runModelingPipeline(
+  parquet_duckdb_path = fixture,
+  threads             = max(1L, parallel::detectCores() - 1L),
+  n_fold              = 5,
+  split               = c(1, 0),
+  min_n               = 25,
+  prop_vi_top_feats   = c(0, 1),
+  pca_threshold       = 0.99,
+  verbose             = TRUE,
+  use_saved_split     = TRUE
 )
 ```
+
+## Session info
+
+```{r session-info}
+sessionInfo()
+```

From 32d39956b4a86a64909dac2f299ae29949fe7a17 Mon Sep 17 00:00:00 2001
From: amcim <amcim@users.noreply.github.com>
Date: Wed, 27 May 2026 23:04:29 +0000
Subject: [PATCH 2/6] Style code (GHA)

---
 R/core_ml.R              | 42 +++++++++++++++++++++++++---------------
 R/generate_matrices_ml.R |  4 +++-
 R/ife_ml.R               |  3 ++-
 R/plot_ml.R              | 23 ++++++++++++++--------
 R/prep_ml.R              |  9 ++++++---
 R/run_Fisher_tests.R     | 18 +++++++++++------
 vignettes/intro.Rmd      | 10 +++++-----
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/R/core_ml.R b/R/core_ml.R
index 37f04cd..39ffd21 100644
--- a/R/core_ml.R
+++ b/R/core_ml.R
@@ -214,7 +214,7 @@ buildLRModel <- function(multi_class = FALSE) {
 #'   feat_b = rep(c(1L, 0L), 5)
 #' )
 #' rec <- buildRecipe(train, use_pca = FALSE)
-#' lr  <- buildLRModel()
+#' lr <- buildLRModel()
 #' buildWflow(lr, rec)
 #' @export
 buildWflow <- function(parsnip_mod, recipe) {
@@ -282,9 +282,11 @@ buildTuningGrid <- function(
 #' @examples
 #' data(demo_ml_tibble)
 #' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
-#' wflow <- buildWflow(buildLRModel(),
-#'                     buildRecipe(rsample::training(data_split)))
-#' grid  <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
+#' wflow <- buildWflow(
+#'   buildLRModel(),
+#'   buildRecipe(rsample::training(data_split))
+#' )
+#' grid <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
 #' set.seed(1)
 #' tuneGrid(wflow, data_split, grid, n_fold = 2)
 #' @export
@@ -339,12 +341,15 @@ tuneGrid <- function(wflow, data_split, grid = buildTuningGrid(model = "LR"),
 #' @examples
 #' data(demo_ml_tibble)
 #' data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
-#' wflow <- buildWflow(buildLRModel(),
-#'                     buildRecipe(rsample::training(data_split)))
+#' wflow <- buildWflow(
+#'   buildLRModel(),
+#'   buildRecipe(rsample::training(data_split))
+#' )
 #' set.seed(1)
 #' tune_res <- tuneGrid(wflow, data_split,
-#'                      buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
-#'                      n_fold = 2)
+#'   buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+#'   n_fold = 2
+#' )
 #' selectBestModel(tune_res, wflow, "mcc")
 #' @export
 selectBestModel <- function(tune_res, wflow, select_best_metric = "mcc") {
@@ -373,9 +378,10 @@ selectBestModel <- function(tune_res, wflow, select_best_metric = "mcc") {
 #' train <- rsample::training(data_split)
 #' wflow <- buildWflow(buildLRModel(), buildRecipe(train))
 #' set.seed(1)
-#' tune_res   <- tuneGrid(wflow, data_split,
-#'                        buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
-#'                        n_fold = 2)
+#' tune_res <- tuneGrid(wflow, data_split,
+#'   buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+#'   n_fold = 2
+#' )
 #' best_wflow <- selectBestModel(tune_res, wflow, "mcc")
 #' fitBestModel(best_wflow, train)
 #' @export
@@ -455,8 +461,10 @@ predictML <- function(fit, test_data) {
 #'     levels = c("Resistant", "Susceptible")
 #'   ),
 #'   .pred_class = factor(
-#'     c("Resistant", "Resistant", "Susceptible", "Resistant",
-#'       "Susceptible", "Resistant", "Susceptible", "Susceptible"),
+#'     c(
+#'       "Resistant", "Resistant", "Susceptible", "Resistant",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible"
+#'     ),
 #'     levels = c("Resistant", "Susceptible")
 #'   )
 #' )
@@ -703,11 +711,13 @@ getConfusionMatrix <- function(test_data_plus_predictions) {
 #'     levels = c("Resistant", "Susceptible")
 #'   ),
 #'   .pred_class = factor(
-#'     c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
-#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+#'     c(
+#'       "Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"
+#'     ),
 #'     levels = c("Resistant", "Susceptible")
 #'   ),
-#'   .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+#'   .pred_Resistant = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
 #'   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
 #' )
 #' calculateEvalMets(preds)
diff --git a/R/generate_matrices_ml.R b/R/generate_matrices_ml.R
index 69a6a87..978edb0 100644
--- a/R/generate_matrices_ml.R
+++ b/R/generate_matrices_ml.R
@@ -677,7 +677,9 @@ skipImbalancedMatrix <- function(genome_ids,
         arrow::write_parquet(combined, out_file)
         log("debug", paste0("Created LOO file: ", out_file))
         out_file
-      }) |> purrr::compact() |> unlist()
+      }) |>
+        purrr::compact() |>
+        unlist()
     }) |> unlist()
 
     created <- c(created, new_files)
diff --git a/R/ife_ml.R b/R/ife_ml.R
index cd12268..7358640 100644
--- a/R/ife_ml.R
+++ b/R/ife_ml.R
@@ -74,7 +74,8 @@ removeTopFeats <- function(ml_input_tibble, top_feat_tibble) {
 #' data(demo_ml_tibble)
 #' set.seed(1)
 #' runIFE(
-#'   demo_ml_tibble, by_num = TRUE, by_vi = FALSE,
+#'   demo_ml_tibble,
+#'   by_num = TRUE, by_vi = FALSE,
 #'   percent_removal_vec = c(25, 50), mix_vec = 0,
 #'   return_feats = TRUE, verbose = FALSE
 #' )
diff --git a/R/plot_ml.R b/R/plot_ml.R
index 84aea3d..c99886c 100644
--- a/R/plot_ml.R
+++ b/R/plot_ml.R
@@ -38,11 +38,13 @@ NULL
 #'     levels = c("Resistant", "Susceptible")
 #'   ),
 #'   .pred_class = factor(
-#'     c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
-#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+#'     c(
+#'       "Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+#'       "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"
+#'     ),
 #'     levels = c("Resistant", "Susceptible")
 #'   ),
-#'   .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+#'   .pred_Resistant = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
 #'   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
 #' )
 #' plotPRC(preds)
@@ -109,8 +111,10 @@ plotTopFeatsVI <- function(fit, n_top_feats = 10) {
 #'   model           = rep(c("LR", "RF"), each = 3),
 #'   avg_f1_score    = c(0.72, 0.78, 0.83, 0.70, 0.75, 0.80)
 #' )
-#' plotDefaultEval(default_eval, x_default_eval = "train_prop",
-#'                 y_default_eval = "avg_f1_score")
+#' plotDefaultEval(default_eval,
+#'   x_default_eval = "train_prop",
+#'   y_default_eval = "avg_f1_score"
+#' )
 #' @export
 plotDefaultEval <- function(
   default_eval_tibble, x_default_eval = "train_prop",
@@ -241,10 +245,13 @@ plotBaselineComparison <- function(
 #' long <- tibble::tibble(
 #'   genome_id = rep(paste0("g", 1:10), each = 2),
 #'   feature_id = rep(c("gene_a", "gene_b"), 10),
-#'   value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
-#'             0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+#'   value = c(
+#'     1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+#'     0, 0, 0, 1, 0, 1, 0, 1, 0, 0
+#'   ),
 #'   genome_drug.resistant_phenotype = rep(
-#'     rep(c("Resistant", "Susceptible"), each = 5), each = 2
+#'     rep(c("Resistant", "Susceptible"), each = 5),
+#'     each = 2
 #'   )
 #' )
 #' tmp <- tempfile(fileext = ".parquet")
diff --git a/R/prep_ml.R b/R/prep_ml.R
index cbacc91..ef3bbff 100644
--- a/R/prep_ml.R
+++ b/R/prep_ml.R
@@ -79,7 +79,8 @@ NULL
 #'   feature_id = rep(c("gene_a", "gene_b"), 6),
 #'   value = c(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1),
 #'   genome_drug.resistant_phenotype = rep(
-#'     rep(c("Resistant", "Susceptible"), each = 3), each = 2
+#'     rep(c("Resistant", "Susceptible"), each = 3),
+#'     each = 2
 #'   )
 #' )
 #' tmp <- tempfile(fileext = ".parquet")
@@ -244,8 +245,10 @@ calculateMinSamples <- function(n_fold, split, res_prop, smallest_n_obs_rs = 1)
 #' @examples
 #' ml <- tibble::tibble(
 #'   genome_id = paste0("g", 1:4),
-#'   genome_drug.resistant_phenotype = c("Resistant", "Susceptible",
-#'                                       "Resistant", "Susceptible"),
+#'   genome_drug.resistant_phenotype = c(
+#'     "Resistant", "Susceptible",
+#'     "Resistant", "Susceptible"
+#'   ),
 #'   feat_a = c(1L, 0L, 1L, 0L)
 #' )
 #' .getTargetVarName(ml)
diff --git a/R/run_Fisher_tests.R b/R/run_Fisher_tests.R
index abaf42c..28b6ea0 100644
--- a/R/run_Fisher_tests.R
+++ b/R/run_Fisher_tests.R
@@ -27,8 +27,10 @@ NULL
 #' @examples
 #' df <- tibble::tibble(
 #'   genome_id = paste0("g", 1:6),
-#'   genome_drug.resistant_phenotype = c("Resistant", "Susceptible", "Resistant",
-#'                                       "Susceptible", "Resistant", "Susceptible"),
+#'   genome_drug.resistant_phenotype = c(
+#'     "Resistant", "Susceptible", "Resistant",
+#'     "Susceptible", "Resistant", "Susceptible"
+#'   ),
 #'   gene_a = c(1L, 0L, 1L, 0L, 1L, 0L),
 #'   gene_b = c(0L, 1L, 1L, 0L, 0L, 1L)
 #' )
@@ -136,7 +138,8 @@ applyBenjaminiHochberg <- function(df_fisher, Q = 0.05) {
 #' )
 #' encoded <- encodePhenotype(df)
 #' fisher <- applyBenjaminiHochberg(
-#'   runFisherTests(encoded$df, encoded$target), Q = 0.05
+#'   runFisherTests(encoded$df, encoded$target),
+#'   Q = 0.05
 #' )
 #' computeFeatureFreq(encoded$df, fisher, encoded$target)
 #' @export
@@ -176,10 +179,13 @@ computeFeatureFreq <- function(df, df_fisher, target) {
 #' long <- tibble::tibble(
 #'   genome_id = rep(paste0("g", 1:10), each = 2),
 #'   feature_id = rep(c("gene_a", "gene_b"), 10),
-#'   value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
-#'             0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+#'   value = c(
+#'     1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+#'     0, 0, 0, 1, 0, 1, 0, 1, 0, 0
+#'   ),
 #'   genome_drug.resistant_phenotype = rep(
-#'     rep(c("Resistant", "Susceptible"), each = 5), each = 2
+#'     rep(c("Resistant", "Susceptible"), each = 5),
+#'     each = 2
 #'   )
 #' )
 #' tmp <- tempfile(fileext = ".parquet")
diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd
index 89f3b18..be27d7a 100644
--- a/vignettes/intro.Rmd
+++ b/vignettes/intro.Rmd
@@ -80,7 +80,7 @@ matrix_path <- file.path(
   out_dir, "matrix", "Sfl_drug_AMP_genes_binary_sparse.parquet"
 )
 
-ml_tibble  <- loadMLInputTibble(matrix_path)
+ml_tibble <- loadMLInputTibble(matrix_path)
 n_features <- getNumFeat(ml_tibble)
 target_var <- .getTargetVarName(ml_tibble)
 
@@ -148,13 +148,13 @@ The builders below are what `runMLPipeline()` chains internally. Call them direc
 ```{r split-data}
 data_split <- splitMLInputTibble(ml_tibble, split = c(0.6, 0.2), seed = 123)
 train_data <- rsample::training(data_split)
-test_data  <- rsample::testing(data_split)
+test_data <- rsample::testing(data_split)
 ```
 
 ```{r build-model}
 recipe <- buildRecipe(train_data, use_pca = FALSE)
 lr_mod <- buildLRModel(multi_class = FALSE)
-wflow  <- buildWflow(lr_mod, recipe)
+wflow <- buildWflow(lr_mod, recipe)
 
 grid <- buildTuningGrid(
   model       = "LR",
@@ -164,9 +164,9 @@ grid <- buildTuningGrid(
 ```
 
 ```{r tune-fit}
-tune_res   <- tuneGrid(wflow, data_split, grid, n_fold = 2)
+tune_res <- tuneGrid(wflow, data_split, grid, n_fold = 2)
 best_wflow <- selectBestModel(tune_res, wflow, select_best_metric = "mcc")
-fit        <- fitBestModel(best_wflow, train_data)
+fit <- fitBestModel(best_wflow, train_data)
 
 preds <- predictML(fit, test_data)
 ```

From 07d59b1bcc6d13974e38f01cf827b62b577da687 Mon Sep 17 00:00:00 2001
From: Alexander McKim <alexander.mckim@cuanschutz.edu>
Date: Wed, 27 May 2026 17:37:47 -0600
Subject: [PATCH 3/6] adding BiocStyle to DESCRIPTION

---
 DESCRIPTION | 20 +++++++++++++-------
 NAMESPACE   |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5802afc..7956cdb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,10 +21,10 @@ Description: Comprehensive machine learning (ML) pipeline for predicting antimic
 License: BSD_3_clause + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
 Depends:
     R (>= 4.5.0)
 Suggests:
+    BiocStyle,
     ComplexHeatmap,
     knitr,
     rmarkdown,
@@ -34,17 +34,17 @@ VignetteBuilder: knitr
 Config/testthat/edition: 3
 Imports:
     arrow,
+    BiocParallel,
     DBI,
     dplyr,
     duckdb,
-    future,
-    future.apply,
     ggplot2,
     ggrepel,
     glmnet,
     glue,
     jsonlite,
     hardhat,
+    methods,
     parsnip,
     purrr,
     readr,
@@ -57,14 +57,20 @@ Imports:
     tidyr,
     tune,
     vip,
+    withr,
     workflows,
     workflowsets,
     yardstick
 biocViews:
-    ML,
-    AMR,
-    MicrobialGenomics,
-    Pathogen,
+    Software,
+    Classification,
+    Regression,
+    StatisticalMethod,
+    FeatureExtraction,
+    MultipleComparison,
+    FunctionalGenomics,
+    Genetics,
     Visualization
 URL: https://github.com/JRaviLab/amRml
 BugReports: https://github.com/JRaviLab/amRml/issues
+Config/roxygen2/version: 8.0.0
diff --git a/NAMESPACE b/NAMESPACE
index 1c074a6..0b9d60e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -91,6 +91,7 @@ importFrom(graphics,barplot)
 importFrom(hardhat,tune)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,write_json)
+importFrom(methods,is)
 importFrom(parsnip,augment)
 importFrom(parsnip,boost_tree)
 importFrom(parsnip,extract_fit_engine)

From d3c06ed29e988314b6786c25cc6bb325c7e55dd2 Mon Sep 17 00:00:00 2001
From: Alexander McKim <alexander.mckim@cuanschutz.edu>
Date: Wed, 27 May 2026 17:50:11 -0600
Subject: [PATCH 4/6] fixing bug involving small fixture rng

---
 tests/testthat/test-pipeline.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test-pipeline.R b/tests/testthat/test-pipeline.R
index 6bb0043..8333942 100644
--- a/tests/testthat/test-pipeline.R
+++ b/tests/testthat/test-pipeline.R
@@ -19,6 +19,7 @@ minimal_grid_args <- list(
 )
 
 run_pipeline <- function(fx, split = c(1, 0), n_fold = 2, ...) {
+  set.seed(1)
   suppressMessages(suppressWarnings(
     runMLPipeline(
       fx,

From 08ecd45eceeb0d7e9180fcf9f160ee9f8015bb2c Mon Sep 17 00:00:00 2001
From: Alexander McKim <alexander.mckim@cuanschutz.edu>
Date: Fri, 29 May 2026 12:24:59 -0600
Subject: [PATCH 5/6] putting seed in runMLPipeline

---
 R/core_ml.R                 | 48 +++++++++++++++++++------------------
 R/run_ML.R                  | 12 ++++++----
 R/run_ml_pipeline.R         |  6 ++++-
 man/buildWflow.Rd           |  2 +-
 man/calculateEvalMets.Rd    |  8 ++++---
 man/computeFeatureFreq.Rd   |  3 ++-
 man/demo_fit.Rd             |  7 +++---
 man/demo_ml_tibble.Rd       | 10 ++++----
 man/dot-getTargetVarName.Rd |  6 +++--
 man/encodePhenotype.Rd      |  6 +++--
 man/fitBestModel.Rd         |  7 +++---
 man/getConfusionMatrix.Rd   |  6 +++--
 man/loadMLInputTibble.Rd    |  3 ++-
 man/plotDefaultEval.Rd      |  6 +++--
 man/plotFishers.Rd          |  9 ++++---
 man/plotPRC.Rd              |  8 ++++---
 man/runFishers.Rd           |  9 ++++---
 man/runIFE.Rd               |  3 ++-
 man/runMDRmodels.Rd         |  5 +++-
 man/runMLmodels.Rd          |  5 +++-
 man/selectBestModel.Rd      | 11 +++++----
 man/splitMLInputTibble.Rd   |  7 ++++--
 man/tuneGrid.Rd             |  8 ++++---
 23 files changed, 120 insertions(+), 75 deletions(-)

diff --git a/R/core_ml.R b/R/core_ml.R
index 39ffd21..21309f1 100644
--- a/R/core_ml.R
+++ b/R/core_ml.R
@@ -70,7 +70,10 @@ NULL
 #' genome is resistant), but not both.
 #' @param split [num] Vector of length 2 indicating the proportion of data to
 #' be designated as training and validation, respectively.
-#' @param seed [num] For reproducible analysis
+#' @param seed [num] Optional. If supplied, the split is seeded (and the
+#' caller's RNG state restored afterward) for standalone reproducibility. When
+#' `NULL` (the default, as used by `runMLPipeline()`), the split inherits the
+#' ambient RNG stream so it can share one seed with downstream tuning and fitting.
 #' @return An `rsplit` object
 #' @examples
 #' ml <- tibble::tibble(
@@ -81,34 +84,33 @@ NULL
 #' )
 #' splitMLInputTibble(ml, split = c(1, 0), seed = 42)
 #' @export
-splitMLInputTibble <- function(ml_input_tibble, split = c(0.6, 0.2), seed = 5280) {
+splitMLInputTibble <- function(ml_input_tibble, split = c(0.6, 0.2), seed = NULL) {
   .checkArgTibble(ml_input_tibble, ml = TRUE)
   .checkArgSplit(split)
-  .checkArgSeed(seed)
+  if (!is.null(seed)) {
+    .checkArgSeed(seed)
+    withr::local_seed(seed)
+  }
 
   target_var <- .getTargetVarName(ml_input_tibble)
 
   # Split the data, maintaining R/S proportions.
-  data_split <- withr::with_seed(seed, {
-    if (split[2] == 0) {
-      # If in CV mode:
-      # Still retain a stratified testing holdout purely for final reporting metrics;
-      # CV is only performed on the training portion.
-      prop_train_for_holdout <- 0.8 # 80 percent train, 20 percent reserved test
-      rsample::initial_split(
-        ml_input_tibble,
-        prop   = prop_train_for_holdout,
-        strata = !!target_var
-      )
-    } else {
-      rsample::initial_validation_split(
-        ml_input_tibble,
-        prop   = split,
-        strata = !!target_var
-      )
-    }
-  })
-
+  if (split[2] == 0) {
+    # CV mode: retain a stratified testing holdout purely for final reporting
+    # metrics; CV is only performed on the training portion.
+    prop_train_for_holdout <- 0.8 # 80 percent train, 20 percent reserved test
+    data_split <- rsample::initial_split(
+      ml_input_tibble,
+      prop   = prop_train_for_holdout,
+      strata = !!target_var
+    )
+  } else {
+    data_split <-rsample::initial_validation_split(
+      ml_input_tibble,
+      prop   = split,
+      strata = !!target_var
+    )
+  }
   return(data_split)
 }
 
diff --git a/R/run_ML.R b/R/run_ML.R
index 06caff2..e85d42e 100644
--- a/R/run_ML.R
+++ b/R/run_ML.R
@@ -542,6 +542,7 @@ createMLinputList <- function(path,
 #' @param use_saved_split Logical. Whether to inherit split/seed/n_fold from ml_parameters.json. Default TRUE.
 #' @param shuffle_labels Logical. Randomly shuffle labels for baseline runs. Default FALSE.
 #' @param use_pca Logical. Use PCA on predictors. Default FALSE.
+#' @param seed Integer. Seed for the parallel RNG streams (`BiocParallel`). Default 5280.
 #'
 #' @return NULL (invisible). Called for side effects (model training and result saving).
 #'
@@ -580,7 +581,8 @@ runMDRmodels <- function(path,
                          return_pred = TRUE,
                          use_saved_split = TRUE,
                          shuffle_labels = FALSE,
-                         use_pca = FALSE) {
+                         use_pca = FALSE,
+                         seed = 5280) {
   files <- createMLinputList(path,
     stratify_by = NULL,
     LOO         = FALSE,
@@ -593,7 +595,7 @@ runMDRmodels <- function(path,
     return(invisible(NULL))
   }
 
-  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = 42)
+  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = seed)
 
   if (isTRUE(verbose)) {
     message("runMDRmodels(): enabling SnowParam with workers = ", threads)
@@ -724,6 +726,7 @@ runMDRmodels <- function(path,
 #' @param use_saved_split Logical. Whether to inherit split/seed/n_fold from ml_parameters.json. Default TRUE.
 #' @param shuffle_labels Logical. Randomly shuffle labels for baseline runs. Default FALSE.
 #' @param use_pca Logical. Use PCA on predictors. Default FALSE.
+#' @param seed Integer. Seed for the parallel RNG streams (`BiocParallel`). Default 5280.
 #'
 #' @return NULL (invisible). Called for side effects (model training and result saving).
 #'
@@ -843,7 +846,8 @@ runMLmodels <- function(path,
                         return_pred = TRUE,
                         use_saved_split = TRUE,
                         shuffle_labels = FALSE,
-                        use_pca = FALSE) {
+                        use_pca = FALSE,
+                        seed = 5280) {
   if (!is.null(stratify_by)) {
     if (!is.character(stratify_by) || length(stratify_by) != 1L) {
       stop("`stratify_by` must be NULL or a single string: 'year' or 'country'.")
@@ -865,7 +869,7 @@ runMLmodels <- function(path,
     return(invisible(NULL))
   }
 
-  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = 42)
+  param <- BiocParallel::SnowParam(workers = threads, type = "SOCK", RNGseed = seed)
 
   if (isTRUE(verbose)) {
     message("runMLmodels(): enabling SnowParam with workers = ", threads)
diff --git a/R/run_ml_pipeline.R b/R/run_ml_pipeline.R
index 797cab0..eab8dcc 100644
--- a/R/run_ml_pipeline.R
+++ b/R/run_ml_pipeline.R
@@ -100,7 +100,11 @@ runMLPipeline <- function(
   .checkArgReturnTuneRes(return_tune_res)
   .checkArgReturnFit(return_fit)
   .checkArgReturnPred(return_pred)
+  .checkArgSeed(seed)
 
+  # Seed once for the whole pipeline so the split, CV folds, tuning, and fit
+  # share one continuous RNG stream (restored to the caller's state on exit).
+  withr::local_seed(seed)
 
   # Set `n_fold` to `NA` if not using cross-validation.
   if (split[2] != 0) {
@@ -223,7 +227,7 @@ runMLPipeline <- function(
       dplyr::select(where(~ !any(is.na(.))))
   }
 
-  data_split <- splitMLInputTibble(ml_input_tibble, split = split, seed = seed)
+  data_split <- splitMLInputTibble(ml_input_tibble, split = split)
 
   # Now correct `data_split` if external `test_data` is provided.
   if (external_test_data & split[2] != 0) {
diff --git a/man/buildWflow.Rd b/man/buildWflow.Rd
index e3a1a6b..5791562 100644
--- a/man/buildWflow.Rd
+++ b/man/buildWflow.Rd
@@ -26,6 +26,6 @@ train <- tibble::tibble(
   feat_b = rep(c(1L, 0L), 5)
 )
 rec <- buildRecipe(train, use_pca = FALSE)
-lr  <- buildLRModel()
+lr <- buildLRModel()
 buildWflow(lr, rec)
 }
diff --git a/man/calculateEvalMets.Rd b/man/calculateEvalMets.Rd
index c4529a9..0506645 100644
--- a/man/calculateEvalMets.Rd
+++ b/man/calculateEvalMets.Rd
@@ -27,11 +27,13 @@ preds <- tibble::tibble(
     levels = c("Resistant", "Susceptible")
   ),
   .pred_class = factor(
-    c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
-      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+    c(
+      "Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"
+    ),
     levels = c("Resistant", "Susceptible")
   ),
-  .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+  .pred_Resistant = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
 )
 calculateEvalMets(preds)
diff --git a/man/computeFeatureFreq.Rd b/man/computeFeatureFreq.Rd
index 3c79dd4..961a507 100644
--- a/man/computeFeatureFreq.Rd
+++ b/man/computeFeatureFreq.Rd
@@ -28,7 +28,8 @@ df <- tibble::tibble(
 )
 encoded <- encodePhenotype(df)
 fisher <- applyBenjaminiHochberg(
-  runFisherTests(encoded$df, encoded$target), Q = 0.05
+  runFisherTests(encoded$df, encoded$target),
+  Q = 0.05
 )
 computeFeatureFreq(encoded$df, fisher, encoded$target)
 }
diff --git a/man/demo_fit.Rd b/man/demo_fit.Rd
index b7b38db..0810296 100644
--- a/man/demo_fit.Rd
+++ b/man/demo_fit.Rd
@@ -3,18 +3,17 @@
 \docType{data}
 \name{demo_fit}
 \alias{demo_fit}
-\title{Small demo LR fit}
+\title{Demo LR fit}
 \format{
 A fitted \code{workflow} object (output of \code{\link[=fitBestModel]{fitBestModel()}}).
 }
 \source{
-Built by \code{inst/scripts/make_demo_data.R}.
+\code{inst/scripts/make_demo_data.R}.
 }
 \usage{
 data(demo_fit)
 }
 \description{
-A tuned logistic-regression workflow fitted on \link{demo_ml_tibble}, for use
-in package examples that require a pre-fitted model.
+A tuned logistic-regression workflow fitted on \link{demo_ml_tibble}.
 }
 \keyword{datasets}
diff --git a/man/demo_ml_tibble.Rd b/man/demo_ml_tibble.Rd
index 4a02e3e..021834d 100644
--- a/man/demo_ml_tibble.Rd
+++ b/man/demo_ml_tibble.Rd
@@ -3,20 +3,20 @@
 \docType{data}
 \name{demo_ml_tibble}
 \alias{demo_ml_tibble}
-\title{Small demo ML input tibble}
+\title{Demo ML input tibble}
 \format{
 A tibble with 60 rows and 82 columns: \code{genome_id},
 \code{genome_drug.resistant_phenotype}, and 80 binary feature columns.
 }
 \source{
-Built by \code{inst/scripts/make_demo_data.R}.
+\code{inst/scripts/make_demo_data.R}.
 }
 \usage{
 data(demo_ml_tibble)
 }
 \description{
-A subset of the AMP-genes-binary matrix generated from the bundled
-\code{Sfl_parquet.duckdb} fixture, trimmed to 60 rows (30 Resistant, 30
-Susceptible) and ~80 feature columns for use in package examples.
+Stratified subset (30 Resistant + 30 Susceptible) of the AMP-genes-binary
+matrix from the bundled \code{Sfl_parquet.duckdb}, restricted to 80 feature
+columns.
 }
 \keyword{datasets}
diff --git a/man/dot-getTargetVarName.Rd b/man/dot-getTargetVarName.Rd
index 641ca8d..f9bd512 100644
--- a/man/dot-getTargetVarName.Rd
+++ b/man/dot-getTargetVarName.Rd
@@ -26,8 +26,10 @@ either \code{genome_drug.resistant_phenotype} or \code{resistant_classes}
 \examples{
 ml <- tibble::tibble(
   genome_id = paste0("g", 1:4),
-  genome_drug.resistant_phenotype = c("Resistant", "Susceptible",
-                                      "Resistant", "Susceptible"),
+  genome_drug.resistant_phenotype = c(
+    "Resistant", "Susceptible",
+    "Resistant", "Susceptible"
+  ),
   feat_a = c(1L, 0L, 1L, 0L)
 )
 .getTargetVarName(ml)
diff --git a/man/encodePhenotype.Rd b/man/encodePhenotype.Rd
index 14993c2..37dc92b 100644
--- a/man/encodePhenotype.Rd
+++ b/man/encodePhenotype.Rd
@@ -30,8 +30,10 @@ encodePhenotype
 \examples{
 df <- tibble::tibble(
   genome_id = paste0("g", 1:6),
-  genome_drug.resistant_phenotype = c("Resistant", "Susceptible", "Resistant",
-                                      "Susceptible", "Resistant", "Susceptible"),
+  genome_drug.resistant_phenotype = c(
+    "Resistant", "Susceptible", "Resistant",
+    "Susceptible", "Resistant", "Susceptible"
+  ),
   gene_a = c(1L, 0L, 1L, 0L, 1L, 0L),
   gene_b = c(0L, 1L, 1L, 0L, 0L, 1L)
 )
diff --git a/man/fitBestModel.Rd b/man/fitBestModel.Rd
index cdc7c58..70a95f1 100644
--- a/man/fitBestModel.Rd
+++ b/man/fitBestModel.Rd
@@ -25,9 +25,10 @@ data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
 train <- rsample::training(data_split)
 wflow <- buildWflow(buildLRModel(), buildRecipe(train))
 set.seed(1)
-tune_res   <- tuneGrid(wflow, data_split,
-                       buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
-                       n_fold = 2)
+tune_res <- tuneGrid(wflow, data_split,
+  buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+  n_fold = 2
+)
 best_wflow <- selectBestModel(tune_res, wflow, "mcc")
 fitBestModel(best_wflow, train)
 }
diff --git a/man/getConfusionMatrix.Rd b/man/getConfusionMatrix.Rd
index 1047fba..45af5de 100644
--- a/man/getConfusionMatrix.Rd
+++ b/man/getConfusionMatrix.Rd
@@ -25,8 +25,10 @@ preds <- tibble::tibble(
     levels = c("Resistant", "Susceptible")
   ),
   .pred_class = factor(
-    c("Resistant", "Resistant", "Susceptible", "Resistant",
-      "Susceptible", "Resistant", "Susceptible", "Susceptible"),
+    c(
+      "Resistant", "Resistant", "Susceptible", "Resistant",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible"
+    ),
     levels = c("Resistant", "Susceptible")
   )
 )
diff --git a/man/loadMLInputTibble.Rd b/man/loadMLInputTibble.Rd
index 8e3310b..bfcfcf4 100644
--- a/man/loadMLInputTibble.Rd
+++ b/man/loadMLInputTibble.Rd
@@ -24,7 +24,8 @@ long <- tibble::tibble(
   feature_id = rep(c("gene_a", "gene_b"), 6),
   value = c(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1),
   genome_drug.resistant_phenotype = rep(
-    rep(c("Resistant", "Susceptible"), each = 3), each = 2
+    rep(c("Resistant", "Susceptible"), each = 3),
+    each = 2
   )
 )
 tmp <- tempfile(fileext = ".parquet")
diff --git a/man/plotDefaultEval.Rd b/man/plotDefaultEval.Rd
index 9eb41ad..ca2dcfe 100644
--- a/man/plotDefaultEval.Rd
+++ b/man/plotDefaultEval.Rd
@@ -41,6 +41,8 @@ default_eval <- tibble::tibble(
   model           = rep(c("LR", "RF"), each = 3),
   avg_f1_score    = c(0.72, 0.78, 0.83, 0.70, 0.75, 0.80)
 )
-plotDefaultEval(default_eval, x_default_eval = "train_prop",
-                y_default_eval = "avg_f1_score")
+plotDefaultEval(default_eval,
+  x_default_eval = "train_prop",
+  y_default_eval = "avg_f1_score"
+)
 }
diff --git a/man/plotFishers.Rd b/man/plotFishers.Rd
index 53a326f..fa4729e 100644
--- a/man/plotFishers.Rd
+++ b/man/plotFishers.Rd
@@ -38,10 +38,13 @@ Labels are applied only to the top-ranked features to preserve clarity.
 long <- tibble::tibble(
   genome_id = rep(paste0("g", 1:10), each = 2),
   feature_id = rep(c("gene_a", "gene_b"), 10),
-  value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
-            0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+  value = c(
+    1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 0, 1, 0, 1, 0, 1, 0, 0
+  ),
   genome_drug.resistant_phenotype = rep(
-    rep(c("Resistant", "Susceptible"), each = 5), each = 2
+    rep(c("Resistant", "Susceptible"), each = 5),
+    each = 2
   )
 )
 tmp <- tempfile(fileext = ".parquet")
diff --git a/man/plotPRC.Rd b/man/plotPRC.Rd
index 19fe869..3bad222 100644
--- a/man/plotPRC.Rd
+++ b/man/plotPRC.Rd
@@ -25,11 +25,13 @@ preds <- tibble::tibble(
     levels = c("Resistant", "Susceptible")
   ),
   .pred_class = factor(
-    c("Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
-      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"),
+    c(
+      "Resistant", "Resistant", "Susceptible", "Resistant", "Susceptible",
+      "Susceptible", "Resistant", "Susceptible", "Susceptible", "Resistant"
+    ),
     levels = c("Resistant", "Susceptible")
   ),
-  .pred_Resistant   = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
+  .pred_Resistant = c(0.9, 0.8, 0.4, 0.7, 0.3, 0.2, 0.6, 0.1, 0.2, 0.55),
   .pred_Susceptible = c(0.1, 0.2, 0.6, 0.3, 0.7, 0.8, 0.4, 0.9, 0.8, 0.45)
 )
 plotPRC(preds)
diff --git a/man/runFishers.Rd b/man/runFishers.Rd
index d90593c..a39b095 100644
--- a/man/runFishers.Rd
+++ b/man/runFishers.Rd
@@ -33,10 +33,13 @@ runFishers
 long <- tibble::tibble(
   genome_id = rep(paste0("g", 1:10), each = 2),
   feature_id = rep(c("gene_a", "gene_b"), 10),
-  value = c(1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
-            0, 0, 0, 1, 0, 1, 0, 1, 0, 0),
+  value = c(
+    1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 0, 1, 0, 1, 0, 1, 0, 0
+  ),
   genome_drug.resistant_phenotype = rep(
-    rep(c("Resistant", "Susceptible"), each = 5), each = 2
+    rep(c("Resistant", "Susceptible"), each = 5),
+    each = 2
   )
 )
 tmp <- tempfile(fileext = ".parquet")
diff --git a/man/runIFE.Rd b/man/runIFE.Rd
index b9d2ce7..2e39c9e 100644
--- a/man/runIFE.Rd
+++ b/man/runIFE.Rd
@@ -55,7 +55,8 @@ returns nMCC at each iteration.
 data(demo_ml_tibble)
 set.seed(1)
 runIFE(
-  demo_ml_tibble, by_num = TRUE, by_vi = FALSE,
+  demo_ml_tibble,
+  by_num = TRUE, by_vi = FALSE,
   percent_removal_vec = c(25, 50), mix_vec = 0,
   return_feats = TRUE, verbose = FALSE
 )
diff --git a/man/runMDRmodels.Rd b/man/runMDRmodels.Rd
index 74be1e3..73df41d 100644
--- a/man/runMDRmodels.Rd
+++ b/man/runMDRmodels.Rd
@@ -17,7 +17,8 @@ runMDRmodels(
   return_pred = TRUE,
   use_saved_split = TRUE,
   shuffle_labels = FALSE,
-  use_pca = FALSE
+  use_pca = FALSE,
+  seed = 5280
 )
 }
 \arguments{
@@ -46,6 +47,8 @@ runMDRmodels(
 \item{shuffle_labels}{Logical. Randomly shuffle labels for baseline runs. Default FALSE.}
 
 \item{use_pca}{Logical. Use PCA on predictors. Default FALSE.}
+
+\item{seed}{Integer. Seed for the parallel RNG streams (\code{BiocParallel}). Default 5280.}
 }
 \value{
 NULL (invisible). Called for side effects (model training and result saving).
diff --git a/man/runMLmodels.Rd b/man/runMLmodels.Rd
index 501e8d8..2020be2 100644
--- a/man/runMLmodels.Rd
+++ b/man/runMLmodels.Rd
@@ -20,7 +20,8 @@ runMLmodels(
   return_pred = TRUE,
   use_saved_split = TRUE,
   shuffle_labels = FALSE,
-  use_pca = FALSE
+  use_pca = FALSE,
+  seed = 5280
 )
 }
 \arguments{
@@ -55,6 +56,8 @@ runMLmodels(
 \item{shuffle_labels}{Logical. Randomly shuffle labels for baseline runs. Default FALSE.}
 
 \item{use_pca}{Logical. Use PCA on predictors. Default FALSE.}
+
+\item{seed}{Integer. Seed for the parallel RNG streams (\code{BiocParallel}). Default 5280.}
 }
 \value{
 NULL (invisible). Called for side effects (model training and result saving).
diff --git a/man/selectBestModel.Rd b/man/selectBestModel.Rd
index c1f6f2d..e507795 100644
--- a/man/selectBestModel.Rd
+++ b/man/selectBestModel.Rd
@@ -23,11 +23,14 @@ Selects best model by F1 score, AUPRC, MCC, or balanced accuracy.
 \examples{
 data(demo_ml_tibble)
 data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
-wflow <- buildWflow(buildLRModel(),
-                    buildRecipe(rsample::training(data_split)))
+wflow <- buildWflow(
+  buildLRModel(),
+  buildRecipe(rsample::training(data_split))
+)
 set.seed(1)
 tune_res <- tuneGrid(wflow, data_split,
-                     buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
-                     n_fold = 2)
+  buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1)),
+  n_fold = 2
+)
 selectBestModel(tune_res, wflow, "mcc")
 }
diff --git a/man/splitMLInputTibble.Rd b/man/splitMLInputTibble.Rd
index bfa3254..12b324b 100644
--- a/man/splitMLInputTibble.Rd
+++ b/man/splitMLInputTibble.Rd
@@ -4,7 +4,7 @@
 \alias{splitMLInputTibble}
 \title{splitMLInputTibble()}
 \usage{
-splitMLInputTibble(ml_input_tibble, split = c(0.6, 0.2), seed = 5280)
+splitMLInputTibble(ml_input_tibble, split = c(0.6, 0.2), seed = NULL)
 }
 \arguments{
 \item{ml_input_tibble}{An ML-ready tibble generated by \code{loadMLInputTibble()}.
@@ -17,7 +17,10 @@ genome is resistant), but not both.}
 \item{split}{\link[pillar:num]{num} Vector of length 2 indicating the proportion of data to
 be designated as training and validation, respectively.}
 
-\item{seed}{\link[pillar:num]{num} For reproducible analysis}
+\item{seed}{\link[pillar:num]{num} Optional. If supplied, the split is seeded (and the
+caller's RNG state restored afterward) for standalone reproducibility. When
+\code{NULL} (the default, as used by \code{runMLPipeline()}), the split inherits the
+ambient RNG stream so it can share one seed with downstream tuning and fitting.}
 }
 \value{
 An \code{rsplit} object
diff --git a/man/tuneGrid.Rd b/man/tuneGrid.Rd
index 90fc4e9..9d985a5 100644
--- a/man/tuneGrid.Rd
+++ b/man/tuneGrid.Rd
@@ -27,9 +27,11 @@ specified.
 \examples{
 data(demo_ml_tibble)
 data_split <- splitMLInputTibble(demo_ml_tibble, split = c(1, 0), seed = 1)
-wflow <- buildWflow(buildLRModel(),
-                    buildRecipe(rsample::training(data_split)))
-grid  <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
+wflow <- buildWflow(
+  buildLRModel(),
+  buildRecipe(rsample::training(data_split))
+)
+grid <- buildTuningGrid("LR", 10^c(-3, -1), c(0, 0.5, 1))
 set.seed(1)
 tuneGrid(wflow, data_split, grid, n_fold = 2)
 }

From f7726c730f092150fbd8e0f0138437d0d9bc4778 Mon Sep 17 00:00:00 2001
From: amcim <amcim@users.noreply.github.com>
Date: Fri, 29 May 2026 18:26:36 +0000
Subject: [PATCH 6/6] Style code (GHA)

---
 R/core_ml.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/core_ml.R b/R/core_ml.R
index 21309f1..d805e9c 100644
--- a/R/core_ml.R
+++ b/R/core_ml.R
@@ -105,7 +105,7 @@ splitMLInputTibble <- function(ml_input_tibble, split = c(0.6, 0.2), seed = NULL
       strata = !!target_var
     )
   } else {
-    data_split <-rsample::initial_validation_split(
+    data_split <- rsample::initial_validation_split(
       ml_input_tibble,
       prop   = split,
       strata = !!target_var