Eventdisplay · GernotMaier · Apr 10, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/docs/changes/56.feature.md b/docs/changes/56.feature.md
@@ -0,0 +1 @@
+Add script to optimize gamma/hadron cut value using Li & Ma significance. Introduce fine binning for cut values and allow to take different source strengths and source spectral shapes into account.
diff --git a/environment.yml b/environment.yml
@@ -6,6 +6,7 @@ dependencies:
   - python=3.13
   - awkward
   - awkward-pandas
+  - astropy-base
   - joblib
   - matplotlib
   - numpy
@@ -21,10 +22,10 @@ dependencies:
   - tabulate
   - towncrier
   - uproot
-  - xgboost
+  - xgboost=3.1.3 # pinned as output format changed in 3.2
 
 # cheatsheet
 # create: conda env create -f environment.yml
 # activate: conda activate eventdisplay_ml
 # update (conda/mamba): conda env update -f environment.yml --prune
-# update (micromamba): micromamba update -f environment.yml
+# update (micromamba): micromamba env update -f environment.yml -n eventdisplay_ml
diff --git a/src/eventdisplay_ml/config.py b/src/eventdisplay_ml/config.py
@@ -54,7 +54,10 @@ def configure_training(analysis_type):
     parser.add_argument(
         "--max_events",
         type=int,
-        help="Maximum number of events to process across all files.",
+        help=(
+            "Maximum number of events to process per input file-list "
+            "(signal/background in classification)."
+        ),
     )
     parser.add_argument(
         "--random_state",
@@ -127,7 +130,14 @@ def configure_training(analysis_type):
     model_configs["models"] = hyper_parameters(
         analysis_type, model_configs.get("hyperparameter_config")
     )
-    model_configs["models"]["xgboost"]["hyper_parameters"]["n_jobs"] = model_configs["max_cores"]
+    for model_name, model_cfg in model_configs["models"].items():
+        hyper_params = model_cfg.get("hyper_parameters")
+        if hyper_params is None:
+            _logger.warning(f"Model '{model_name}' has no hyper_parameters; skipping updates.")
+            continue
+        hyper_params["n_jobs"] = model_configs["max_cores"]
+        if model_configs.get("random_state") is not None:
+            hyper_params["random_state"] = model_configs["random_state"]
     model_configs["targets"] = target_features(analysis_type)
 
     if analysis_type == "stereo_analysis":

diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py
@@ -691,8 +691,10 @@ def train_classification(df, model_configs):
         Dictionary of model configurations.
     """
     if df[0].empty or df[1].empty:
-        _logger.warning("Skipping training due to empty data.")
-        return None
+        raise ValueError(
+            "Classification training requires non-empty signal and background data. "
+            f"signal_events={len(df[0])}, background_events={len(df[1])}."
+        )
 
     df[0]["label"] = 1
     df[1]["label"] = 0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add script to optimize gamma/hadron cut value using Li & Ma significance. Introduce fine binning for cut values and allow to take different source strengths and source spectral shapes into account.