diff --git a/DataFormats/PyTorchTest/interface/Device.h b/DataFormats/PyTorchTest/interface/Device.h
index d25ba9c1d9d..5adf6a5b01f 100644
--- a/DataFormats/PyTorchTest/interface/Device.h
+++ b/DataFormats/PyTorchTest/interface/Device.h
@@ -4,17 +4,16 @@
 #include "DataFormats/Portable/interface/PortableDeviceCollection.h"
 #include "DataFormats/PyTorchTest/interface/Layout.h"
 
-
 namespace torchportable {
 
-template <typename TDev>
-using ParticleCollectionDevice = PortableDeviceCollection<ParticleSoA, TDev>;
+  template <typename TDev>
+  using ParticleCollectionDevice = PortableDeviceCollection<ParticleSoA, TDev>;
 
-template <typename TDev>
-using ClassificationCollectionDevice = PortableDeviceCollection<ClassificationSoA, TDev>;
+  template <typename TDev>
+  using ClassificationCollectionDevice = PortableDeviceCollection<ClassificationSoA, TDev>;
 
-template <typename TDev>
-using RegressionCollectionDevice = PortableDeviceCollection<RegressionSoA, TDev>;
+  template <typename TDev>
+  using RegressionCollectionDevice = PortableDeviceCollection<RegressionSoA, TDev>;
 
 }  // namespace torchportable
 
diff --git a/DataFormats/PyTorchTest/interface/Host.h b/DataFormats/PyTorchTest/interface/Host.h
index 72a96d5d223..271838576a5 100644
--- a/DataFormats/PyTorchTest/interface/Host.h
+++ b/DataFormats/PyTorchTest/interface/Host.h
@@ -6,9 +6,9 @@
 
 namespace torchportable {
 
-using ParticleCollectionHost = PortableHostCollection<ParticleSoA>;
-using ClassificationCollectionHost = PortableHostCollection<ClassificationSoA>;
-using RegressionCollectionHost = PortableHostCollection<RegressionSoA>;
+  using ParticleCollectionHost = PortableHostCollection<ParticleSoA>;
+  using ClassificationCollectionHost = PortableHostCollection<ClassificationSoA>;
+  using RegressionCollectionHost = PortableHostCollection<RegressionSoA>;
 
 }  // namespace torchportable
 
diff --git a/DataFormats/PyTorchTest/interface/Layout.h b/DataFormats/PyTorchTest/interface/Layout.h
index 91e35f89927..4d1252a3a89 100644
--- a/DataFormats/PyTorchTest/interface/Layout.h
+++ b/DataFormats/PyTorchTest/interface/Layout.h
@@ -7,24 +7,14 @@
 
 namespace torchportable {
 
-GENERATE_SOA_LAYOUT(ParticleLayout,
-  SOA_COLUMN(float, pt),
-  SOA_COLUMN(float, eta),
-  SOA_COLUMN(float, phi)
-)
-using ParticleSoA = ParticleLayout<>;
+  GENERATE_SOA_LAYOUT(ParticleLayout, SOA_COLUMN(float, pt), SOA_COLUMN(float, eta), SOA_COLUMN(float, phi))
+  using ParticleSoA = ParticleLayout<>;
 
+  GENERATE_SOA_LAYOUT(ClassificationLayout, SOA_COLUMN(float, c1), SOA_COLUMN(float, c2))
+  using ClassificationSoA = ClassificationLayout<>;
 
-GENERATE_SOA_LAYOUT(ClassificationLayout,
-  SOA_COLUMN(float, c1),
-  SOA_COLUMN(float, c2)
-)
-using ClassificationSoA = ClassificationLayout<>;
-
-GENERATE_SOA_LAYOUT(RegressionLayout,
-  SOA_COLUMN(float, reco_pt)
-)
-using RegressionSoA = RegressionLayout<>;
+  GENERATE_SOA_LAYOUT(RegressionLayout, SOA_COLUMN(float, reco_pt))
+  using RegressionSoA = RegressionLayout<>;
 
 }  // namespace torchportable
 
diff --git a/DataFormats/PyTorchTest/interface/alpaka/Collections.h b/DataFormats/PyTorchTest/interface/alpaka/Collections.h
index 62d6e3c49e0..5d05794da12 100644
--- a/DataFormats/PyTorchTest/interface/alpaka/Collections.h
+++ b/DataFormats/PyTorchTest/interface/alpaka/Collections.h
@@ -12,40 +12,34 @@
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE::torchportable {
 
-/**
+  /**
  * make the names from the top-level `torchportable` namespace visible for unqualified lookup
  * inside the `ALPAKA_ACCELERATOR_NAMESPACE::torchportable` namespace
  */
-using namespace ::torchportable;
-using ::torchportable::ParticleCollectionHost;
-using ::torchportable::ClassificationCollectionHost;
-using ::torchportable::RegressionCollectionHost;
-using ::torchportable::ParticleCollectionDevice;
-using ::torchportable::ClassificationCollectionDevice;
-using ::torchportable::RegressionCollectionDevice;
-
-using ParticleCollection =
-    std::conditional_t<
-        std::is_same_v<Device, alpaka::DevCpu>, 
-        ParticleCollectionHost, 
-        ParticleCollectionDevice<Device>>;
-
-using ClassificationCollection =
-    std::conditional_t<
-        std::is_same_v<Device, alpaka::DevCpu>, 
-        ClassificationCollectionHost, 
-        ClassificationCollectionDevice<Device>>;
-
-using RegressionCollection =
-    std::conditional_t<
-        std::is_same_v<Device, alpaka::DevCpu>, 
-        RegressionCollectionHost, 
-        RegressionCollectionDevice<Device>>;
+  using namespace ::torchportable;
+  using ::torchportable::ClassificationCollectionDevice;
+  using ::torchportable::ClassificationCollectionHost;
+  using ::torchportable::ParticleCollectionDevice;
+  using ::torchportable::ParticleCollectionHost;
+  using ::torchportable::RegressionCollectionDevice;
+  using ::torchportable::RegressionCollectionHost;
+
+  using ParticleCollection = std::
+      conditional_t<std::is_same_v<Device, alpaka::DevCpu>, ParticleCollectionHost, ParticleCollectionDevice<Device>>;
+
+  using ClassificationCollection = std::conditional_t<std::is_same_v<Device, alpaka::DevCpu>,
+                                                      ClassificationCollectionHost,
+                                                      ClassificationCollectionDevice<Device>>;
+
+  using RegressionCollection = std::conditional_t<std::is_same_v<Device, alpaka::DevCpu>,
+                                                  RegressionCollectionHost,
+                                                  RegressionCollectionDevice<Device>>;
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::torchportable
 
 ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportable::ParticleCollection, torchportable::ParticleCollectionHost);
-ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportable::ClassificationCollection, torchportable::ClassificationCollectionHost);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportable::ClassificationCollection,
+                                      torchportable::ClassificationCollectionHost);
 ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportable::RegressionCollection, torchportable::RegressionCollectionHost);
 
 #endif  // DATA_FORMATS__PYTORCH_TEST__INTERFACE__ALPAKA__COLLECTIONS_H_
diff --git a/PhysicsTools/PyTorch/interface/AlpakaConfig.h b/PhysicsTools/PyTorch/interface/AlpakaConfig.h
index e7ad2ec3520..946a751c4ad 100644
--- a/PhysicsTools/PyTorch/interface/AlpakaConfig.h
+++ b/PhysicsTools/PyTorch/interface/AlpakaConfig.h
@@ -1,4 +1,4 @@
-// ROCm/HIP backend not yet supported, see: https://github.com/pytorch/pytorch/blob/main/aten/CMakeLists.txt#L75 
+// ROCm/HIP backend not yet supported, see: https://github.com/pytorch/pytorch/blob/main/aten/CMakeLists.txt#L75
 
 #ifndef PHYSICS_TOOLS__PYTORCH__INTERFACE__ALPAKA_CONFIG_H_
 #define PHYSICS_TOOLS__PYTORCH__INTERFACE__ALPAKA_CONFIG_H_
@@ -9,13 +9,12 @@
 // #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED || ALPAKA_ACC_GPU_HIP_ENABLED
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
 #include <c10/cuda/CUDAStream.h>
-#endif 
-
+#endif
 
 namespace cms::torch::alpaka {
 
-template <typename>
-inline constexpr bool false_value = false;
+  template <typename>
+  inline constexpr bool false_value = false;
 
 /**
  * @brief Specifies the device type used in the torch integration with Alpaka.
@@ -24,18 +23,18 @@ inline constexpr bool false_value = false;
  * PyTorch device type (`c10::DeviceType`) for the system.
  */
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CUDA;
+  constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CUDA;
 // #elif ALPAKA_ACC_GPU_HIP_ENABLED
 // constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::HIP;
 #elif ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
-constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CPU;
+  constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CPU;
 #elif ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
-constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CPU;
+  constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CPU;
 #else
 #error "Could not define the torch device type."
 #endif
 
-/**
+  /**
  * @brief Converts an Alpaka device or queue object to a PyTorch device.
  * 
  * This function extracts the native handle from the Alpaka device or queue and
@@ -47,17 +46,17 @@ constexpr c10::DeviceType kTorchDeviceType = c10::DeviceType::CPU;
  * @param obj The Alpaka object (Device or Queue) to convert.
  * @return Corresponding PyTorch device.
  */
-template <typename T>
-inline ::torch::Device device(const T& obj) {
-  if constexpr (::alpaka::isDevice<T>)
-    return ::torch::Device(kTorchDeviceType, obj.getNativeHandle());
-  else if constexpr (::alpaka::isQueue<T>)
-    return ::torch::Device(kTorchDeviceType, ::alpaka::getDev(obj).getNativeHandle());
-  else
-    static_assert(false_value<T>, "Unsupported type passed to device()");
-}
+  template <typename T>
+  inline ::torch::Device device(const T &obj) {
+    if constexpr (::alpaka::isDevice<T>)
+      return ::torch::Device(kTorchDeviceType, obj.getNativeHandle());
+    else if constexpr (::alpaka::isQueue<T>)
+      return ::torch::Device(kTorchDeviceType, ::alpaka::getDev(obj).getNativeHandle());
+    else
+      static_assert(false_value<T>, "Unsupported type passed to device()");
+  }
 
-/**
+  /**
  * @brief Base class for managing guard scopes that ensure thread safety
  *        when working with Alpaka queues and PyTorch models.
  * 
@@ -66,133 +65,118 @@ inline ::torch::Device device(const T& obj) {
  * 
  * @tparam TQueue The type of Alpaka queue.
  */
-template <typename T, typename TQueue>
-class GuardScope {
- public:
-  explicit GuardScope(const TQueue &queue) : queue_(queue) { set(); }
-  ~GuardScope() { reset(); };
-
- protected:
-  const TQueue& queue_;
-
- private:
-  
-  void set() {
-    /**
+  template <typename T, typename TQueue>
+  class GuardScope {
+  public:
+    explicit GuardScope(const TQueue &queue) : queue_(queue) { set(); }
+    ~GuardScope() { reset(); };
+
+  protected:
+    const TQueue &queue_;
+
+  private:
+    void set() {
+      /**
      * @brief Sets the guard to disable multi-threading and control PyTorch's threading model.
      * @note Global call.
      */
-    [[maybe_unused]] static bool threading_guard = [] {
-      at::set_num_threads(1);
-      at::set_num_interop_threads(1);
-      return true;
-    } ();      
-    static_cast<T*>(this)->set_impl();
-  }
+      [[maybe_unused]] static bool threading_guard = [] {
+        at::set_num_threads(1);
+        at::set_num_interop_threads(1);
+        return true;
+      }();
+      static_cast<T *>(this)->set_impl();
+    }
 
-  /**
+    /**
    * @brief Resets the guard state, restoring the previous configuration.
    */
-  void reset() {
-    static_cast<T*>(this)->reset_impl();
-  }
-};
+    void reset() { static_cast<T *>(this)->reset_impl(); }
+  };
 
-/**
+  /**
  * @brief Specialization of GuardScope for different Alpaka queue types.
  */
-template <typename TQueue>
-struct GuardTraits;
+  template <typename TQueue>
+  struct GuardTraits;
 
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
 
-/**
+  /**
  * @brief Guard for CUDA-based operations when using Alpaka with the CUDA backend.
  * 
  * This class manages the stream switching required when running Alpaka queues on a CUDA device.
  * It ensures that the correct stream is set before operations and resets it afterward.
  */
-class CudaAsyncGuard : public GuardScope<CudaAsyncGuard, alpaka_cuda_async::Queue> {
- public:
-  using Base = GuardScope<CudaAsyncGuard, alpaka_cuda_async::Queue>;
-  using Base::Base;
-  
-  void set_impl() {
-    prev_stream_ = c10::cuda::getCurrentCUDAStream();
-    auto dev = device(this->queue_);
-    auto stream = c10::cuda::getStreamFromExternal(this->queue_.getNativeHandle(), dev.index());
-    c10::cuda::setCurrentCUDAStream(stream);
-  }
-
-  void reset_impl() {
-    c10::cuda::setCurrentCUDAStream(prev_stream_);
-  }
-
- private:
-  c10::cuda::CUDAStream prev_stream_ = c10::cuda::getDefaultCUDAStream();
-};
-
-template <>
-struct GuardTraits<alpaka_cuda_async::Queue> {
-  using type = CudaAsyncGuard;
-};
+  class CudaAsyncGuard : public GuardScope<CudaAsyncGuard, alpaka_cuda_async::Queue> {
+  public:
+    using Base = GuardScope<CudaAsyncGuard, alpaka_cuda_async::Queue>;
+    using Base::Base;
+
+    void set_impl() {
+      prev_stream_ = c10::cuda::getCurrentCUDAStream();
+      auto dev = device(this->queue_);
+      auto stream = c10::cuda::getStreamFromExternal(this->queue_.getNativeHandle(), dev.index());
+      c10::cuda::setCurrentCUDAStream(stream);
+    }
+
+    void reset_impl() { c10::cuda::setCurrentCUDAStream(prev_stream_); }
+
+  private:
+    c10::cuda::CUDAStream prev_stream_ = c10::cuda::getDefaultCUDAStream();
+  };
+
+  template <>
+  struct GuardTraits<alpaka_cuda_async::Queue> {
+    using type = CudaAsyncGuard;
+  };
 // #elif ALPAKA_ACC_GPU_HIP_ENABLED
 // Similar structure can be added for HIP support if needed. torch::cuda ns is hip when using ROCm/HIP
 #elif ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
 
-/**
+  /**
  * @brief Guard for serial CPU operations using Alpaka with CPU serial backend.
  */
-class CpuSerialSyncGuard : public GuardScope<CpuSerialSyncGuard, alpaka_serial_sync::Queue> {
- public:
-  using Base = GuardScope<CpuSerialSyncGuard, alpaka_serial_sync::Queue>;
-  using Base::Base;
-  void set_impl() {
-    /**< nothing to be done, only threading is disabled (in base class) */
-  }  
-  void reset_impl() {
-    /**< nothing to be done */
-  }
-};
-
-template <>
-struct GuardTraits<alpaka_serial_sync::Queue> {
-  using type = CpuSerialSyncGuard;
-};
+  class CpuSerialSyncGuard : public GuardScope<CpuSerialSyncGuard, alpaka_serial_sync::Queue> {
+  public:
+    using Base = GuardScope<CpuSerialSyncGuard, alpaka_serial_sync::Queue>;
+    using Base::Base;
+    void set_impl() { /**< nothing to be done, only threading is disabled (in base class) */ }
+    void reset_impl() { /**< nothing to be done */ }
+  };
+
+  template <>
+  struct GuardTraits<alpaka_serial_sync::Queue> {
+    using type = CpuSerialSyncGuard;
+  };
 #elif ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
 
-/**
+  /**
  * @brief Guard for asynchronous CPU operations using Alpaka with TBB backend.
  */
-class CpuTbbAsyncGuard : public GuardScope<CpuTbbAsyncGuard, alpaka_tbb_async::Queue> {
- public:
-  using Base = GuardScope<CpuTbbAsyncGuard, alpaka_tbb_async::Queue>;
-  using Base::Base;
-  void set_impl() {
-    /**< nothing to be done, only threading is disabled (in base class) */
-  }
-  void reset_impl() {
-    /**< nothing to be done */
-  }
-};
-
-template <>
-struct GuardTraits<alpaka_tbb_async::Queue> {
-  using type = CpuTbbAsyncGuard;
-};
+  class CpuTbbAsyncGuard : public GuardScope<CpuTbbAsyncGuard, alpaka_tbb_async::Queue> {
+  public:
+    using Base = GuardScope<CpuTbbAsyncGuard, alpaka_tbb_async::Queue>;
+    using Base::Base;
+    void set_impl() { /**< nothing to be done, only threading is disabled (in base class) */ }
+    void reset_impl() { /**< nothing to be done */ }
+  };
+
+  template <>
+  struct GuardTraits<alpaka_tbb_async::Queue> {
+    using type = CpuTbbAsyncGuard;
+  };
 #else
 #error "Torch guard for this backend is not defined."
 #endif
 
-
-/**
+  /**
  * @brief Alias for the appropriate Guard type based on the Alpaka queue.
  */
-template <typename TQueue>
-using Guard = typename GuardTraits<TQueue>::type;
-
+  template <typename TQueue>
+  using Guard = typename GuardTraits<TQueue>::type;
 
-/**
+  /**
  * @brief Computes a unique hash representation for the given Alpaka queue.
  * 
  * This function generates a hash string that uniquely represents the given Alpaka queue.
@@ -202,23 +186,21 @@ using Guard = typename GuardTraits<TQueue>::type;
  * @param queue The Alpaka queue to generate the hash for.
  * @return A string representing the unique hash for the queue.
  */
-template <typename TQueue, typename = std::enable_if_t<::alpaka::isQueue<TQueue>>>
-inline std::string queue_hash(const TQueue &queue) {
-  std::stringstream repr;
+  template <typename TQueue, typename = std::enable_if_t<::alpaka::isQueue<TQueue>>>
+  inline std::string queue_hash(const TQueue &queue) {
+    std::stringstream repr;
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-  auto stream = c10::cuda::getStreamFromExternal(
-  queue.getNativeHandle(), device(queue).index());
-  repr << "0x" << std::hex << stream.stream();
-  return repr.str();
+    auto stream = c10::cuda::getStreamFromExternal(queue.getNativeHandle(), device(queue).index());
+    repr << "0x" << std::hex << stream.stream();
+    return repr.str();
 // #elif ALPAKA_ACC_GPU_HIP_ENABLED
 //   return "0x0";
 #endif
-  repr << "0x" << std::hex << std::hash<std::thread::id>{}(std::this_thread::get_id());
-  return repr.str();
-}
-
+    repr << "0x" << std::hex << std::hash<std::thread::id>{}(std::this_thread::get_id());
+    return repr.str();
+  }
 
-/**
+  /**
  * @brief Computes a unique hash representation for the current stream associated with the given Alpaka queue.
  * 
  * This function generates a hash string representing the current stream of the given Alpaka queue,
@@ -228,20 +210,20 @@ inline std::string queue_hash(const TQueue &queue) {
  * @param queue The Alpaka queue to generate the current stream hash for.
  * @return A string representing the unique hash for the current stream.
  */
-template <typename TQueue, typename = std::enable_if_t<::alpaka::isQueue<TQueue>>>
-inline std::string current_stream_hash(const TQueue &queue) {
-  std::stringstream repr;
+  template <typename TQueue, typename = std::enable_if_t<::alpaka::isQueue<TQueue>>>
+  inline std::string current_stream_hash(const TQueue &queue) {
+    std::stringstream repr;
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-  const auto dev = device(queue);
-  auto stream = c10::cuda::getCurrentCUDAStream(dev.index());
-  repr << "0x" << std::hex << stream.stream();
-  return repr.str();
+    const auto dev = device(queue);
+    auto stream = c10::cuda::getCurrentCUDAStream(dev.index());
+    repr << "0x" << std::hex << stream.stream();
+    return repr.str();
 #elif ALPAKA_ACC_GPU_HIP_ENABLED
-  return "0x0";
+    return "0x0";
 #endif
-  repr << "0x" << std::hex << std::hash<std::thread::id>{}(std::this_thread::get_id());
-  return repr.str();
-}
+    repr << "0x" << std::hex << std::hash<std::thread::id>{}(std::this_thread::get_id());
+    return repr.str();
+  }
 
 }  // namespace cms::torch::alpaka
 
diff --git a/PhysicsTools/PyTorch/interface/Config.h b/PhysicsTools/PyTorch/interface/Config.h
index 72230b927cc..8269cac7a54 100644
--- a/PhysicsTools/PyTorch/interface/Config.h
+++ b/PhysicsTools/PyTorch/interface/Config.h
@@ -6,29 +6,27 @@
 
 #include "FWCore/Utilities/interface/Exception.h"
 
-
 namespace cms::torch {
-  
-/** 
+
+  /** 
  * The following `constexpr` constants are aliases for various PyTorch data types.
  * 
  * Primarily used for specifying tensor types when working with PyTorch 
  * tensors in the CMS environment.
 */
-constexpr auto Byte = ::torch::kByte;  /**< unsigned 8-bit integer type. */
-constexpr auto Char = ::torch::kChar;  /**< signed 8-bit integer type. */
-constexpr auto Short = ::torch::kShort;  /**< signed 16-bit integer type. */
-constexpr auto Int = ::torch::kInt;  /**< signed 32-bit integer type. */
-constexpr auto Long = ::torch::kLong;  /**< signed 64-bit integer type. */
-constexpr auto UInt16 = ::torch::kUInt16;  /**< unsigned 16-bit integer type. */
-constexpr auto UInt32 = ::torch::kUInt32;  /**< unsigned 32-bit integer type. */
-constexpr auto UInt64 = ::torch::kUInt64;  /**< unsigned 64-bit integer type. */
-constexpr auto Half = ::torch::kHalf;  /**< 16-bit floating point type. */
-constexpr auto Float = ::torch::kFloat;  /**< 32-bit floating point type. */
-constexpr auto Double = ::torch::kDouble;  /**< 64-bit floating point type. */
-
-
-/**
+  constexpr auto Byte = ::torch::kByte;     /**< unsigned 8-bit integer type. */
+  constexpr auto Char = ::torch::kChar;     /**< signed 8-bit integer type. */
+  constexpr auto Short = ::torch::kShort;   /**< signed 16-bit integer type. */
+  constexpr auto Int = ::torch::kInt;       /**< signed 32-bit integer type. */
+  constexpr auto Long = ::torch::kLong;     /**< signed 64-bit integer type. */
+  constexpr auto UInt16 = ::torch::kUInt16; /**< unsigned 16-bit integer type. */
+  constexpr auto UInt32 = ::torch::kUInt32; /**< unsigned 32-bit integer type. */
+  constexpr auto UInt64 = ::torch::kUInt64; /**< unsigned 64-bit integer type. */
+  constexpr auto Half = ::torch::kHalf;     /**< 16-bit floating point type. */
+  constexpr auto Float = ::torch::kFloat;   /**< 32-bit floating point type. */
+  constexpr auto Double = ::torch::kDouble; /**< 64-bit floating point type. */
+
+  /**
  * @brief Loads a TorchScript model.
  *
  * This function wraps `torch::jit::load` to load a TorchScript model from a specified path.
@@ -42,16 +40,16 @@ constexpr auto Double = ::torch::kDouble;  /**< 64-bit floating point type. */
  * @note This function is intended for model loading in CMSSW environments, providing
  *       integration with the framework's exception handling and logging facilities.
  */
-inline ::torch::jit::script::Module load(const std::string &model_path) {
-  try {
-    return ::torch::jit::load(model_path);
-  } catch (const c10::Error &e) {
-    cms::Exception ex("ModelLoadingError");
-    ex.addContext("Calling cms::torch::load(const std::string&)");
-    ex.addAdditionalInfo("Error loading the model: " + std::string(e.what()));
-    throw ex;
+  inline ::torch::jit::script::Module load(const std::string &model_path) {
+    try {
+      return ::torch::jit::load(model_path);
+    } catch (const c10::Error &e) {
+      cms::Exception ex("ModelLoadingError");
+      ex.addContext("Calling cms::torch::load(const std::string&)");
+      ex.addAdditionalInfo("Error loading the model: " + std::string(e.what()));
+      throw ex;
+    }
   }
-}
 
 }  // namespace cms::torch
 
diff --git a/PhysicsTools/PyTorch/interface/Converter.h b/PhysicsTools/PyTorch/interface/Converter.h
index 2c831d5bf7c..8770bdcc4b3 100644
--- a/PhysicsTools/PyTorch/interface/Converter.h
+++ b/PhysicsTools/PyTorch/interface/Converter.h
@@ -6,97 +6,100 @@
 #include "DataFormats/SoATemplate/interface/SoALayout.h"
 #include "PhysicsTools/PyTorch/interface/SoAWrapper.h"
 
-
 namespace cms::torch::alpaka {
 
-// Metadata to run model with input SOA and fill output SOA.
-template <typename SOA_Input, typename SOA_Output>
-class ModelMetadata {
- public:
-  SoAWrapper<SOA_Input> input;
-  SoAWrapper<SOA_Output> output;
+  // Metadata to run model with input SOA and fill output SOA.
+  template <typename SOA_Input, typename SOA_Output>
+  class ModelMetadata {
+  public:
+    SoAWrapper<SOA_Input> input;
+    SoAWrapper<SOA_Output> output;
 
-  // Used in AOT model class to correctly choose multi or single output conversion
-  // Default value true, as single value can be parsed with multi output
-  bool multi_output;
+    // Used in AOT model class to correctly choose multi or single output conversion
+    // Default value true, as single value can be parsed with multi output
+    bool multi_output;
 
-  ModelMetadata(const SoAWrapper<SOA_Input>& input_, const SoAWrapper<SOA_Output>& output_, bool multi_output_=true)
-      : input(input_), output(output_), multi_output(multi_output_) {}
-};
+    ModelMetadata(const SoAWrapper<SOA_Input>& input_, const SoAWrapper<SOA_Output>& output_, bool multi_output_ = true)
+        : input(input_), output(output_), multi_output(multi_output_) {}
+  };
 
-// Static class to wrap raw SOA pointer in tensor object without copying.
-class Converter {
- public:
-  // Calculate size and stride of data store based on InputMetadata and return list of IValue, which is parent class of torch::tensor.
-  template <typename SOA_Input, typename SOA_Output>
-  static std::vector<::torch::IValue> convert_input(const ModelMetadata<SOA_Input, SOA_Output>& metadata,
-                                                    ::torch::Device device) {
-    std::vector<::torch::IValue> tensors(metadata.input.nBlocks);
-    for (int i = 0; i < metadata.input.nBlocks; i++) {
-      assert(reinterpret_cast<intptr_t>(metadata.input[metadata.input.order[i]].ptr) % SOA_Input::alignment == 0);
-      tensors.at(i) =
-          std::move(Converter::array_to_tensor<SOA_Input>(device, metadata.input[metadata.input.order[i]]));
+  // Static class to wrap raw SOA pointer in tensor object without copying.
+  class Converter {
+  public:
+    // Calculate size and stride of data store based on InputMetadata and return list of IValue, which is parent class of torch::tensor.
+    template <typename SOA_Input, typename SOA_Output>
+    static std::vector<::torch::IValue> convert_input(const ModelMetadata<SOA_Input, SOA_Output>& metadata,
+                                                      ::torch::Device device) {
+      std::vector<::torch::IValue> tensors(metadata.input.nBlocks);
+      for (int i = 0; i < metadata.input.nBlocks; i++) {
+        assert(reinterpret_cast<intptr_t>(metadata.input[metadata.input.order[i]].ptr) % SOA_Input::alignment == 0);
+        tensors.at(i) =
+            std::move(Converter::array_to_tensor<SOA_Input>(device, metadata.input[metadata.input.order[i]]));
+      }
+      return tensors;
     }
-    return tensors;
-  }
 
-  // AOT specific implementation, as model expects vector of torch::Tensor not torch::IValue
-  template <typename SOA_Input, typename SOA_Output>
-  static std::vector<::torch::Tensor> convert_input_tensor(const ModelMetadata<SOA_Input, SOA_Output>& metadata,
-                                                    ::torch::Device device) {
-    std::vector<::torch::Tensor> tensors(metadata.input.nBlocks);
-    for (int i = 0; i < metadata.input.nBlocks; i++) {
-      assert(reinterpret_cast<intptr_t>(metadata.input[metadata.input.order[i]].ptr) % SOA_Input::alignment == 0);
-      tensors.at(i) =
-          std::move(Converter::array_to_tensor<SOA_Input>(device, metadata.input[metadata.input.order[i]]));
+    // AOT specific implementation, as model expects vector of torch::Tensor not torch::IValue
+    template <typename SOA_Input, typename SOA_Output>
+    static std::vector<::torch::Tensor> convert_input_tensor(const ModelMetadata<SOA_Input, SOA_Output>& metadata,
+                                                             ::torch::Device device) {
+      std::vector<::torch::Tensor> tensors(metadata.input.nBlocks);
+      for (int i = 0; i < metadata.input.nBlocks; i++) {
+        assert(reinterpret_cast<intptr_t>(metadata.input[metadata.input.order[i]].ptr) % SOA_Input::alignment == 0);
+        tensors.at(i) =
+            std::move(Converter::array_to_tensor<SOA_Input>(device, metadata.input[metadata.input.order[i]]));
+      }
+      return tensors;
     }
-    return tensors;
-  }
 
-  // Calculate size and stride of data store based on OutputMetadata and return single output tensor
-  template <typename SOA_Input, typename SOA_Output>
-  static ::torch::Tensor convert_output(const ModelMetadata<SOA_Input, SOA_Output>& metadata, ::torch::Device device) {
-    assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[0]].ptr) % SOA_Output::alignment == 0);
-    return Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[0]]);
-  }
+    // Calculate size and stride of data store based on OutputMetadata and return single output tensor
+    template <typename SOA_Input, typename SOA_Output>
+    static ::torch::Tensor convert_output(const ModelMetadata<SOA_Input, SOA_Output>& metadata,
+                                          ::torch::Device device) {
+      assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[0]].ptr) % SOA_Output::alignment == 0);
+      return Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[0]]);
+    }
 
-  // Calculate size and stride of data store based on OutputMetadata and fill SoA with tensor values
-  template <typename SOA_Input, typename SOA_Output>
-  static void convert_output(const std::vector<::torch::IValue>& tensors,
-                             const ModelMetadata<SOA_Input, SOA_Output>& metadata,
-                             ::torch::Device device) {
-    for (int i = 0; i < metadata.output.nBlocks; i++) {
-      // Only tensors are currenlty supported for conversion
-      if(tensors.at(i).isTensor()) {
-        assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[i]].ptr) % SOA_Output::alignment == 0);
-        Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[i]]) = tensors.at(i).toTensor();
-      } 
+    // Calculate size and stride of data store based on OutputMetadata and fill SoA with tensor values
+    template <typename SOA_Input, typename SOA_Output>
+    static void convert_output(const std::vector<::torch::IValue>& tensors,
+                               const ModelMetadata<SOA_Input, SOA_Output>& metadata,
+                               ::torch::Device device) {
+      for (int i = 0; i < metadata.output.nBlocks; i++) {
+        // Only tensors are currenlty supported for conversion
+        if (tensors.at(i).isTensor()) {
+          assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[i]].ptr) % SOA_Output::alignment ==
+                 0);
+          Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[i]]) =
+              tensors.at(i).toTensor();
+        }
+      }
     }
-  }
 
-  // AOT specific implementation, as return type is torch::Tensor not torch::IValue
-  template <typename SOA_Input, typename SOA_Output>
-  static void convert_output(const std::vector<::torch::Tensor>& tensors,
-                             const ModelMetadata<SOA_Input, SOA_Output>& metadata,
-                             ::torch::Device device) {
-    for (int i = 0; i < metadata.output.nBlocks; i++) {
-      assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[i]].ptr) % SOA_Output::alignment == 0);
-      Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[i]]) = tensors.at(i);
+    // AOT specific implementation, as return type is torch::Tensor not torch::IValue
+    template <typename SOA_Input, typename SOA_Output>
+    static void convert_output(const std::vector<::torch::Tensor>& tensors,
+                               const ModelMetadata<SOA_Input, SOA_Output>& metadata,
+                               ::torch::Device device) {
+      for (int i = 0; i < metadata.output.nBlocks; i++) {
+        assert(reinterpret_cast<intptr_t>(metadata.output[metadata.output.order[i]].ptr) % SOA_Output::alignment == 0);
+        Converter::array_to_tensor<SOA_Output>(device, metadata.output[metadata.output.order[i]]) = tensors.at(i);
+      }
     }
-  }
 
- private:
-  // Wrap raw pointer by torch::Tensor based on type, size and stride.
-  template <typename SOA_Layout>
-  static ::torch::Tensor array_to_tensor(::torch::Device device, const Block<SOA_Layout>& block) {
-    auto options = ::torch::TensorOptions().dtype(block.type)
+  private:
+    // Wrap raw pointer by torch::Tensor based on type, size and stride.
+    template <typename SOA_Layout>
+    static ::torch::Tensor array_to_tensor(::torch::Device device, const Block<SOA_Layout>& block) {
+      auto options = ::torch::TensorOptions()
+                         .dtype(block.type)
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-    .device(device)
+                         .device(device)
 #endif
-    .pinned_memory(true);
-    return ::torch::from_blob(block.ptr, block.size, block.stride, options);
-  }
-};
+                         .pinned_memory(true);
+      return ::torch::from_blob(block.ptr, block.size, block.stride, options);
+    }
+  };
 
 }  // namespace cms::torch::alpaka
 
diff --git a/PhysicsTools/PyTorch/interface/Model.h b/PhysicsTools/PyTorch/interface/Model.h
index d2b70cca59b..a1272cae87e 100644
--- a/PhysicsTools/PyTorch/interface/Model.h
+++ b/PhysicsTools/PyTorch/interface/Model.h
@@ -5,65 +5,59 @@
 #include "PhysicsTools/PyTorch/interface/AlpakaConfig.h"
 #include "PhysicsTools/PyTorch/interface/Converter.h"
 
-
 namespace cms::torch::alpaka {
 
-/**
+  /**
  * @class CompilationType
  * @brief Specifies the type of compilation used for the model.
  */
-enum class CompilationType {
-  kJustInTime,  /**< JIT compilation, load and compile at runtime from TorchScript */
-  kAheadOfTime  /**< AOT compilation, load precompiled shared library at runtime */
-};
+  enum class CompilationType {
+    kJustInTime, /**< JIT compilation, load and compile at runtime from TorchScript */
+    kAheadOfTime /**< AOT compilation, load precompiled shared library at runtime */
+  };
 
-/**
+  /**
  * @class Model
  * @brief Model base class.
  */
-template <CompilationType>
-class Model;
+  template <CompilationType>
+  class Model;
 
-/**
+  /**
  * @class Model
  * @brief AOT Model specific implementation.
  *
  * Interface for loading and running models with AOT compilation models.
  */
-template <>
-class Model<CompilationType::kAheadOfTime> {
- public:
-  explicit Model(const std::string &model_path)
-	  : loader_(model_path), runner_(loader_.get_runner()) {}
+  template <>
+  class Model<CompilationType::kAheadOfTime> {
+  public:
+    explicit Model(const std::string &model_path) : loader_(model_path), runner_(loader_.get_runner()) {}
 
-  /**
+    /**
    * @brief Torch fallback for testing purposes.
    * @param inputs input tensors
    * @return output tensors
    */
-  auto forward(std::vector<::torch::Tensor> &inputs) const {
-    return runner_->run(inputs);
-  }
+    auto forward(std::vector<::torch::Tensor> &inputs) const { return runner_->run(inputs); }
 
-  /**
+    /**
    * @brief Torch portable inference with SoA buffers without explicit copies.
    * @param metadata Metadata specyfies how memory blob is organized and can be accessed.
    */
-  template <typename InMemLayout, typename OutMemLayout>
-  void forward(const ModelMetadata<InMemLayout, OutMemLayout> &metadata) const {
-    std::vector<at::Tensor> inputs = Converter::convert_input_tensor(metadata, device_);
-    
-    if (metadata.multi_output) {
-      auto out = runner_->run(inputs);
-      Converter::convert_output(out, metadata, device_);
-    }
-    else {
-      Converter::convert_output(metadata, device_) = runner_->run(inputs)[0];
+    template <typename InMemLayout, typename OutMemLayout>
+    void forward(const ModelMetadata<InMemLayout, OutMemLayout> &metadata) const {
+      std::vector<at::Tensor> inputs = Converter::convert_input_tensor(metadata, device_);
+
+      if (metadata.multi_output) {
+        auto out = runner_->run(inputs);
+        Converter::convert_output(out, metadata, device_);
+      } else {
+        Converter::convert_output(metadata, device_) = runner_->run(inputs)[0];
+      }
     }
-      
-  }
 
-  /**
+    /**
    * @brief Change model metadata `device_` to a specified device.
    *
    * Utility function updates model metadata `device_` using Alpaka-aware 
@@ -81,48 +75,46 @@ class Model<CompilationType::kAheadOfTime> {
    * @note If the device is already set to the specified device, the function does nothing.
    * @throws A static assertion failure at compile-time if an unsupported type is passed.
    */
-  template <typename T>
-  void to(const T &obj) const {
-    ::torch::Device device = ::torch::kCPU;
-    if constexpr (::alpaka::isDevice<T> || ::alpaka::isQueue<T>) {
-      device = cms::torch::alpaka::device(obj);
-    } else if constexpr (std::is_same_v<T, ::torch::Device>) {
-      device = obj;
-    } else {
-      static_assert(false_value<T>, "Unsupported type passed -> to(const T&)");
+    template <typename T>
+    void to(const T &obj) const {
+      ::torch::Device device = ::torch::kCPU;
+      if constexpr (::alpaka::isDevice<T> || ::alpaka::isQueue<T>) {
+        device = cms::torch::alpaka::device(obj);
+      } else if constexpr (std::is_same_v<T, ::torch::Device>) {
+        device = obj;
+      } else {
+        static_assert(false_value<T>, "Unsupported type passed -> to(const T&)");
+      }
+
+      if (device == device_)
+        return;
+      device_ = device;
     }
 
-    if (device == device_)
-      return;
-    device_ = device;
-  }
-
-  /**
+    /**
    * @brief Torch portable inference with SoA buffers without explicit copies.
    * @return Current device binded to the model.
    */
-  ::torch::Device device() const { return device_; }
-
- private:
-  mutable ::torch::Device device_ = ::torch::kCPU;  /**< Device metadata of the model */
-  mutable ::torch::inductor::AOTIModelPackageLoader loader_;  /**< AOT model package loader */
-  mutable ::torch::inductor::AOTIModelContainerRunner* runner_ = nullptr;  /**< AOT model container runner */
-}; 
+    ::torch::Device device() const { return device_; }
 
+  private:
+    mutable ::torch::Device device_ = ::torch::kCPU;                        /**< Device metadata of the model */
+    mutable ::torch::inductor::AOTIModelPackageLoader loader_;              /**< AOT model package loader */
+    mutable ::torch::inductor::AOTIModelContainerRunner *runner_ = nullptr; /**< AOT model container runner */
+  };
 
-/**
+  /**
  * @class Model
  * @brief JIT Model specific implementation.
  *
  * Interface for loading and running models with JIT compilation models.
  */
-template <>
-class Model<CompilationType::kJustInTime> {
- public:
-  Model(const std::string &model_path) 
-      : model_(std::move(cms::torch::load(model_path))) {}
+  template <>
+  class Model<CompilationType::kJustInTime> {
+  public:
+    Model(const std::string &model_path) : model_(std::move(cms::torch::load(model_path))) {}
 
-  /**
+    /**
    * @brief Moves the model to a specified device.
    *
    * Utility function updates the internal device of the model, using Alpaka-aware 
@@ -138,54 +130,52 @@ class Model<CompilationType::kJustInTime> {
    * @note If the device is already set to the specified device, the function does nothing.
    * @throws A static assertion failure at compile-time if an unsupported type is passed.
    */
-  template <typename T>
-  void to(const T &obj) const {
-    ::torch::Device device = ::torch::kCPU;
-    if constexpr (::alpaka::isDevice<T> || ::alpaka::isQueue<T>) {
-      device = cms::torch::alpaka::device(obj);
-    } else if constexpr (std::is_same_v<T, ::torch::Device>) {
-      device = obj;
-    } else {
-      static_assert(false_value<T>, "Unsupported type passed -> to(const T&)");
+    template <typename T>
+    void to(const T &obj) const {
+      ::torch::Device device = ::torch::kCPU;
+      if constexpr (::alpaka::isDevice<T> || ::alpaka::isQueue<T>) {
+        device = cms::torch::alpaka::device(obj);
+      } else if constexpr (std::is_same_v<T, ::torch::Device>) {
+        device = obj;
+      } else {
+        static_assert(false_value<T>, "Unsupported type passed -> to(const T&)");
+      }
+
+      if (device == device_)
+        return;
+
+      device_ = device;
+      model_.to(device_, true);
     }
 
-    if (device == device_)
-      return;
-
-    device_ = device;
-    model_.to(device_, true);
-  }
-
-  /**
+    /**
    * @brief Torch fallback for testing purposes.
    * @param inputs input tensors
    * @return output tensors
    */
-  auto forward(std::vector<::torch::IValue> &inputs) const {
-    return model_.forward(inputs);
-  }
+    auto forward(std::vector<::torch::IValue> &inputs) const { return model_.forward(inputs); }
 
-  /**
+    /**
    * @brief Torch portable inference with SoA buffers without explicit copies.
    * @param metadata Metadata specyfies how memory blob is organized and can be accessed.
    */
-  template <typename InMemLayout, typename OutMemLayout>
-  void forward(const ModelMetadata<InMemLayout, OutMemLayout> &metadata) const {
-    auto input_tensor = Converter::convert_input(metadata, device_);
-    // TODO: think about support for multi-output models (without temporary mem copy)
-    Converter::convert_output(metadata, device_) = model_.forward(input_tensor).toTensor();
-  };
-
-  /**
+    template <typename InMemLayout, typename OutMemLayout>
+    void forward(const ModelMetadata<InMemLayout, OutMemLayout> &metadata) const {
+      auto input_tensor = Converter::convert_input(metadata, device_);
+      // TODO: think about support for multi-output models (without temporary mem copy)
+      Converter::convert_output(metadata, device_) = model_.forward(input_tensor).toTensor();
+    };
+
+    /**
    * @brief Torch portable inference with SoA buffers without explicit copies.
    * @return Current device binded to the model.
    */
-  ::torch::Device device() const { return device_; }
-  
- private:
-  mutable ::torch::jit::script::Module model_;  /**< JIT model */
-  mutable ::torch::Device device_ = ::torch::kCPU;  /**< Device binded to the model */
-};
+    ::torch::Device device() const { return device_; }
+
+  private:
+    mutable ::torch::jit::script::Module model_;     /**< JIT model */
+    mutable ::torch::Device device_ = ::torch::kCPU; /**< Device binded to the model */
+  };
 
 }  // namespace cms::torch::alpaka
 
diff --git a/PhysicsTools/PyTorch/interface/Nvtx.h b/PhysicsTools/PyTorch/interface/Nvtx.h
index bca01b62441..8c2e6880c5a 100644
--- a/PhysicsTools/PyTorch/interface/Nvtx.h
+++ b/PhysicsTools/PyTorch/interface/Nvtx.h
@@ -1,7 +1,8 @@
 #ifndef PHYSICS_TOOLS__PYTORCH__INTERFACE__NVTX_H_
 #define PHYSICS_TOOLS__PYTORCH__INTERFACE__NVTX_H_
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || \
+    defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
 #include <nvtx3/nvToolsExt.h>
 #endif
 
@@ -15,28 +16,30 @@
 class NvtxScopedRange {
 public:
   NvtxScopedRange(const char* msg) {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || \
+    defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
     id_ = nvtxRangeStartA(msg);
 #endif
-   }
- 
+  }
+
   void end() {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || \
+    defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
     if (active_) {
       active_ = false;
       nvtxRangeEnd(id_);
     }
 #endif
   }
- 
+
   ~NvtxScopedRange() { end(); }
- 
+
 private:
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) || \
+    defined(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED)
   nvtxRangeId_t id_;
   bool active_ = true;
 #endif
 };
 
-
 #endif  // PHYSICS_TOOLS__PYTORCH__INTERFACE__NVTX_H_
diff --git a/PhysicsTools/PyTorch/interface/SoAWrapper.h b/PhysicsTools/PyTorch/interface/SoAWrapper.h
index 720d91f774f..62ea79e3684 100644
--- a/PhysicsTools/PyTorch/interface/SoAWrapper.h
+++ b/PhysicsTools/PyTorch/interface/SoAWrapper.h
@@ -9,192 +9,199 @@
 
 #include "DataFormats/SoATemplate/interface/SoALayout.h"
 
-
 using namespace cms::soa;
 
 namespace cms::torch::alpaka {
 
-template <typename T, typename... Others>
-concept SameTypes = (std::same_as<T, Others> && ...);
-
-// Wrapper struct to merge info about scalar columns and multidimensional eigen columns
-struct Columns {
-  std::vector<int> columns;
-
-  // Constructor for scalar columns
-  Columns(int columns_) { columns.push_back(columns_); }
-
-  // Constructor for multidimensional eigen columns
-  Columns(const std::vector<int>& columns_) : columns(columns_) {}
-  Columns(std::vector<int>&& columns_) : columns(std::move(columns_)) {}
-
-  size_t size() const { return columns.size(); }
-  int operator[](int i) const { return columns[i]; }
-  void push(int i) { columns.push_back(i); }
-};
-
-// Block of SoA Columns with same type and element size.
-// Calculates size and stride and stores torch type.
-template <typename SOA_Layout>
-struct Block {
-  std::vector<long int> stride;
-  std::vector<long int> size;
-
-  void* ptr;
-  ::torch::ScalarType type;
-  size_t bytes;
-  bool is_scalar = false;
-
-  Block() : ptr(nullptr) {}
-  // Constructor for columns and eigen columns
-  Block(int nElements, void* ptr_, const Columns& columns_, ::torch::ScalarType type_, size_t bytes_)
-      : ptr(ptr_), type(type_), bytes(bytes_) {
-    stride = std::move(create_stride(nElements, columns_, bytes_));
-    size = std::move(create_size(nElements, columns_));
+  template <typename T, typename... Others>
+  concept SameTypes = (std::same_as<T, Others> && ...);
+
+  // Wrapper struct to merge info about scalar columns and multidimensional eigen columns
+  struct Columns {
+    std::vector<int> columns;
+
+    // Constructor for scalar columns
+    Columns(int columns_) { columns.push_back(columns_); }
+
+    // Constructor for multidimensional eigen columns
+    Columns(const std::vector<int>& columns_) : columns(columns_) {}
+    Columns(std::vector<int>&& columns_) : columns(std::move(columns_)) {}
+
+    size_t size() const { return columns.size(); }
+    int operator[](int i) const { return columns[i]; }
+    void push(int i) { columns.push_back(i); }
   };
 
-  // Constructor for scalar columns
-  Block(int nElements, void* ptr_, ::torch::ScalarType type_, size_t bytes_) : ptr(ptr_), type(type_), bytes(bytes_) {
-    stride = std::move(create_stride(nElements, 1, bytes_, true));
-    size = std::move(create_size(nElements, 1));
+  // Block of SoA Columns with same type and element size.
+  // Calculates size and stride and stores torch type.
+  template <typename SOA_Layout>
+  struct Block {
+    std::vector<long int> stride;
+    std::vector<long int> size;
+
+    void* ptr;
+    ::torch::ScalarType type;
+    size_t bytes;
+    bool is_scalar = false;
+
+    Block() : ptr(nullptr) {}
+    // Constructor for columns and eigen columns
+    Block(int nElements, void* ptr_, const Columns& columns_, ::torch::ScalarType type_, size_t bytes_)
+        : ptr(ptr_), type(type_), bytes(bytes_) {
+      stride = std::move(create_stride(nElements, columns_, bytes_));
+      size = std::move(create_size(nElements, columns_));
+    };
+
+    // Constructor for scalar columns
+    Block(int nElements, void* ptr_, ::torch::ScalarType type_, size_t bytes_) : ptr(ptr_), type(type_), bytes(bytes_) {
+      stride = std::move(create_stride(nElements, 1, bytes_, true));
+      size = std::move(create_size(nElements, 1));
+    };
+
+    static int get_elems_per_column(int nElements, size_t bytes) {
+      int per_bunch = SOA_Layout::alignment / bytes;
+      int bunches = std::ceil(1.0 * nElements / per_bunch);
+      return bunches * per_bunch;
+    }
+
+  private:
+    static std::vector<long int> create_size(int nElements, const Columns& columns) {
+      std::vector<long int> size(columns.size() + 1);
+      size[0] = nElements;
+      std::copy(columns.columns.begin(), columns.columns.end(), size.begin() + 1);
+
+      return size;
+    }
+
+    static std::vector<long int> create_stride(int nElements,
+                                               const Columns& columns,
+                                               size_t bytes,
+                                               bool is_scalar = false) {
+      int N = columns.size() + 1;
+      std::vector<long int> stride(N);
+
+      int per_bunch = SOA_Layout::alignment / bytes;
+      int bunches = std::ceil(1.0 * nElements / per_bunch);
+
+      if (!is_scalar)
+        stride[0] = 1;
+      else {
+        // Jump no element per row, to fill with scalar value
+        stride[0] = 0;
+        bunches = 1;
+      }
+      stride[std::min(2, N - 1)] = bunches * per_bunch;
+
+      // eigen are stored in column major, but still for every column.
+      if (N > 2) {
+        for (int i = 3; i < N; i++) {
+          stride[i] = stride[i - 1] * columns[i - 2];
+        }
+        stride[1] = stride[N - 1] * columns[N - 2];
+      }
+      return stride;
+    }
   };
 
-  static int get_elems_per_column(int nElements, size_t bytes) {
-    int per_bunch = SOA_Layout::alignment / bytes;
-    int bunches = std::ceil(1.0 * nElements / per_bunch);
-    return bunches * per_bunch;
-  }
-
- private:
-  static std::vector<long int> create_size(int nElements, const Columns& columns) {
-    std::vector<long int> size(columns.size() + 1);
-    size[0] = nElements;
-    std::copy(columns.columns.begin(), columns.columns.end(), size.begin() + 1);
-
-    return size;
-  }
-
-  static std::vector<long int> create_stride(int nElements,
-                                              const Columns& columns,
-                                              size_t bytes,
-                                              bool is_scalar = false) {
-    int N = columns.size() + 1;
-    std::vector<long int> stride(N);
-
-    int per_bunch = SOA_Layout::alignment / bytes;
-    int bunches = std::ceil(1.0 * nElements / per_bunch);
-
-    if (!is_scalar)
-      stride[0] = 1;
-    else {
-      // Jump no element per row, to fill with scalar value
-      stride[0] = 0;
-      bunches = 1;
+  // Metadata for SOA split into multiple blocks.
+  // An order for the resulting tensors can be defined.
+  template <typename SOA_Layout>
+  struct SoAWrapper {
+  private:
+    std::map<std::string, Block<SOA_Layout>> blocks;
+
+    template <typename T>
+    inline static ::torch::ScalarType get_type() {
+      return ::torch::CppTypeToScalarType<T>();
     }
-    stride[std::min(2, N - 1)] = bunches * per_bunch;
 
-    // eigen are stored in column major, but still for every column.
-    if (N > 2) {
-      for (int i = 3; i < N; i++) {
-        stride[i] = stride[i - 1] * columns[i - 2];
+    inline static std::vector<int> standard_order(int size) {
+      std::vector<int> order(size);
+      for (int i = 0; i < size; i++) {
+        order[i] = i;
       }
-      stride[1] = stride[N - 1] * columns[N - 2];
+      return order;
     }
-    return stride;
-  }
-};
-
-// Metadata for SOA split into multiple blocks.
-// An order for the resulting tensors can be defined.
-template <typename SOA_Layout>
-struct SoAWrapper {
- private:
-  std::map<std::string, Block<SOA_Layout>> blocks;
-
-  template <typename T>
-  inline static ::torch::ScalarType get_type() {
-    return ::torch::CppTypeToScalarType<T>();
-  }
-
-  inline static std::vector<int> standard_order(int size) {
-    std::vector<int> order(size);
-    for (int i = 0; i < size; i++) {
-      order[i] = i;
+
+    template <typename T, typename... Others>
+    bool check_location(int elements, T* column, T* other_column, Others... others) {
+      return check_location(elements, other_column, others...) && (column + elements) == other_column;
     }
-    return order;
-  }
 
-  template <typename T, typename... Others>
-  bool check_location(int elements, T* column, T* other_column, Others... others) {
-    return check_location(elements, other_column, others...) &&  (column + elements) == other_column;
-  }
-
-  template <typename T>
-  bool check_location(int elements, T* column, T* other_column) {
-    return (column + elements) == other_column;
-  }
-
-  template <typename T>
-  bool check_location(int elements, T* column) {
-    return true;
-  }
-
- public:
-  // Order of resulting tensor list
-  std::vector<std::string> order;
-  int nElements;
-  int nBlocks;
-
-  SoAWrapper(int nElements_) : nElements(nElements_), nBlocks(0) {}
-  
-  // TODO: Check columns are contiguous
-  template <typename T, typename... Others>
-    requires (SameTypes<typename T::ValueType, typename Others::ValueType...> && T::columnType == SoAColumnType::eigen)
-  void append_block(const std::string& name, T column, Others... others) {
-    const auto [ptr, stride] = column.tupleOrPointer();
-    
-    int elems = Block<SOA_Layout>::get_elems_per_column(nElements, sizeof(typename T::ScalarType));
-    assert(check_location(elems * T::ValueType::RowsAtCompileTime * T::ValueType::ColsAtCompileTime, ptr, std::get<0>(others.tupleOrPointer())...));
-
-    Columns col({sizeof...(others) + 1, T::ValueType::RowsAtCompileTime});
-    if (T::ValueType::ColsAtCompileTime > 1)
-      col.push(T::ValueType::ColsAtCompileTime);
-
-    blocks.try_emplace(name, nElements, ptr, col, get_type<typename T::ScalarType>(), sizeof(typename T::ScalarType));
-    order.push_back(name);
-    nBlocks += 1;
-  }
-
-  // TODO: Check columns are contiguous
-  // Append a block based on a typed pointer and a column object.
-  // Can be normal column or eigen column.
-  template <typename T, typename... Others>
-    requires (SameTypes<typename T::ScalarType, typename Others::ScalarType...> && T::columnType == SoAColumnType::column)
-  void append_block(const std::string& name, T column, Others... others) {
-    int elems = Block<SOA_Layout>::get_elems_per_column(nElements, sizeof(typename T::ScalarType));
-    assert(check_location(elems, column.tupleOrPointer(), others.tupleOrPointer()...));
-
-    blocks.try_emplace(name, nElements, column.tupleOrPointer(), sizeof...(others) + 1, get_type<typename T::ScalarType>(), sizeof(typename T::ScalarType));
-    order.push_back(name);
-    nBlocks += 1;
-  }
-
-  template <SoAColumnType col_type, typename T>
-    requires (std::is_arithmetic_v<T> && col_type == SoAColumnType::scalar)
-  void append_block(const std::string& name, SoAParametersImpl<col_type, T> column) {
-    blocks.try_emplace(name, nElements, column.tupleOrPointer(), get_type<T>(), sizeof(T));
-    order.push_back(name);
-    nBlocks += 1;
-  }
-
-  // The order is defined by the order append_block is called.
-  // It can be changed by passing a vector of the block names afterwards.
-  // All blocks have to be mentioned.
-  void change_order(const std::vector<std::string>& new_order) { order = new_order; }
-  void change_order(std::vector<std::string>&& new_order) { order = std::move(new_order); }
-
-  inline Block<SOA_Layout> operator[](const std::string& key) const { return blocks.at(key); }
-};
+    template <typename T>
+    bool check_location(int elements, T* column, T* other_column) {
+      return (column + elements) == other_column;
+    }
+
+    template <typename T>
+    bool check_location(int elements, T* column) {
+      return true;
+    }
+
+  public:
+    // Order of resulting tensor list
+    std::vector<std::string> order;
+    int nElements;
+    int nBlocks;
+
+    SoAWrapper(int nElements_) : nElements(nElements_), nBlocks(0) {}
+
+    // TODO: Check columns are contiguous
+    template <typename T, typename... Others>
+      requires(SameTypes<typename T::ValueType, typename Others::ValueType...> && T::columnType == SoAColumnType::eigen)
+    void append_block(const std::string& name, T column, Others... others) {
+      const auto [ptr, stride] = column.tupleOrPointer();
+
+      int elems = Block<SOA_Layout>::get_elems_per_column(nElements, sizeof(typename T::ScalarType));
+      assert(check_location(elems * T::ValueType::RowsAtCompileTime * T::ValueType::ColsAtCompileTime,
+                            ptr,
+                            std::get<0>(others.tupleOrPointer())...));
+
+      Columns col({sizeof...(others) + 1, T::ValueType::RowsAtCompileTime});
+      if (T::ValueType::ColsAtCompileTime > 1)
+        col.push(T::ValueType::ColsAtCompileTime);
+
+      blocks.try_emplace(name, nElements, ptr, col, get_type<typename T::ScalarType>(), sizeof(typename T::ScalarType));
+      order.push_back(name);
+      nBlocks += 1;
+    }
+
+    // TODO: Check columns are contiguous
+    // Append a block based on a typed pointer and a column object.
+    // Can be normal column or eigen column.
+    template <typename T, typename... Others>
+      requires(SameTypes<typename T::ScalarType, typename Others::ScalarType...> &&
+               T::columnType == SoAColumnType::column)
+    void append_block(const std::string& name, T column, Others... others) {
+      int elems = Block<SOA_Layout>::get_elems_per_column(nElements, sizeof(typename T::ScalarType));
+      assert(check_location(elems, column.tupleOrPointer(), others.tupleOrPointer()...));
+
+      blocks.try_emplace(name,
+                         nElements,
+                         column.tupleOrPointer(),
+                         sizeof...(others) + 1,
+                         get_type<typename T::ScalarType>(),
+                         sizeof(typename T::ScalarType));
+      order.push_back(name);
+      nBlocks += 1;
+    }
+
+    template <SoAColumnType col_type, typename T>
+      requires(std::is_arithmetic_v<T> && col_type == SoAColumnType::scalar)
+    void append_block(const std::string& name, SoAParametersImpl<col_type, T> column) {
+      blocks.try_emplace(name, nElements, column.tupleOrPointer(), get_type<T>(), sizeof(T));
+      order.push_back(name);
+      nBlocks += 1;
+    }
+
+    // The order is defined by the order append_block is called.
+    // It can be changed by passing a vector of the block names afterwards.
+    // All blocks have to be mentioned.
+    void change_order(const std::vector<std::string>& new_order) { order = new_order; }
+    void change_order(std::vector<std::string>&& new_order) { order = std::move(new_order); }
+
+    inline Block<SOA_Layout> operator[](const std::string& key) const { return blocks.at(key); }
+  };
 
 }  // namespace cms::torch::alpaka
 
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/AotRegressionProducer.cc b/PhysicsTools/PyTorch/plugins/alpaka/AotRegressionProducer.cc
index 140733d93f2..c1c89e4c015 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/AotRegressionProducer.cc
+++ b/PhysicsTools/PyTorch/plugins/alpaka/AotRegressionProducer.cc
@@ -19,12 +19,11 @@
 #include "PhysicsTools/PyTorch/interface/Nvtx.h"
 #include "PhysicsTools/PyTorch/plugins/alpaka/Kernels.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using AotModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kAheadOfTime>; 
+  using AotModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kAheadOfTime>;
 
-/**
+  /**
  * @class AotRegressionProducer
  * @brief EDProducer that runs a regression model on input particles with Alpaka backend.
  *
@@ -33,103 +32,106 @@ using AotModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::
  * 
  * Demonstrates the interoperability of Alpaka with PyTorch and the use of SoA data layout with AOT models.
  */
-class AotRegressionProducer : public stream::EDProducer<edm::GlobalCache<AotModel>> {
- public:
-  AotRegressionProducer(const edm::ParameterSet &params, const AotModel *cache);
+  class AotRegressionProducer : public stream::EDProducer<edm::GlobalCache<AotModel>> {
+  public:
+    AotRegressionProducer(const edm::ParameterSet &params, const AotModel *cache);
 
-  static std::unique_ptr<AotModel> initializeGlobalCache(const edm::ParameterSet &params);
-  static void globalEndJob(const AotModel *cache);
+    static std::unique_ptr<AotModel> initializeGlobalCache(const edm::ParameterSet &params);
+    static void globalEndJob(const AotModel *cache);
 
-  void produce(device::Event &event, const device::EventSetup &event_setup) override;
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+    void produce(device::Event &event, const device::EventSetup &event_setup) override;
+    static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
 
- private:  
-  const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;  /**< Token to get input data. */ 
-  const device::EDPutToken<torchportable::RegressionCollection> outputs_token_;  /**< Token to store output data. */
-  std::unique_ptr<Kernels> kernels_ = nullptr;  /**< Kernel utilities for post-inference validation. */
-};
+  private:
+    const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;    /**< Token to get input data. */
+    const device::EDPutToken<torchportable::RegressionCollection> outputs_token_; /**< Token to store output data. */
+    std::unique_ptr<Kernels> kernels_ = nullptr; /**< Kernel utilities for post-inference validation. */
+  };
 
-AotRegressionProducer::AotRegressionProducer(edm::ParameterSet const& params, const AotModel *cache)
-  : EDProducer<edm::GlobalCache<AotModel>>(params),
-    inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
-    outputs_token_{produces()},
-    kernels_{std::make_unique<Kernels>()} {}
+  AotRegressionProducer::AotRegressionProducer(edm::ParameterSet const &params, const AotModel *cache)
+      : EDProducer<edm::GlobalCache<AotModel>>(params),
+        inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
+        outputs_token_{produces()},
+        kernels_{std::make_unique<Kernels>()} {}
 
-/**
+  /**
  * @brief Initializes the global cache by loading the model.
  * @param params Configuration parameters.
  * @return A unique pointer to the loaded model.
  */
-std::unique_ptr<AotModel> AotRegressionProducer::initializeGlobalCache(const edm::ParameterSet &param) {
-  auto model_path = param.getParameter<edm::FileInPath>("modelPath").fullPath();
-  return std::make_unique<AotModel>(model_path);
-}
+  std::unique_ptr<AotModel> AotRegressionProducer::initializeGlobalCache(const edm::ParameterSet &param) {
+    auto model_path = param.getParameter<edm::FileInPath>("modelPath").fullPath();
+    return std::make_unique<AotModel>(model_path);
+  }
 
-/**
+  /**
  * @brief Called at the end of the job to clean up global cache.
  * @param cache Pointer to the global model cache.
  */
-void AotRegressionProducer::globalEndJob(const AotModel *cache) {}
+  void AotRegressionProducer::globalEndJob(const AotModel *cache) {}
 
-/**
+  /**
  * @brief Performs inference on the event data.
  * @param event The event to process.
  * @param event_setup Event setup information.
  */
-void AotRegressionProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
-  auto t1 = std::chrono::high_resolution_clock::now();
-  NvtxScopedRange produce_range("Regression::produce");
-  
-  // guard torch internal operations to not conflict with cmssw fw scheme
-  cms::torch::alpaka::Guard<Queue> guard(event.queue());  
-  // sanity check for debug
-  assert(cms::torch::alpaka::queue_hash(event.queue()) == cms::torch::alpaka::current_stream_hash(event.queue()));
-
-  // get data
-  // TODO: const_cast should not be done by user
-  // in principle should not be done by anyone
-  // @see: torch::from_blob(void*) 
-  auto& inputs =  const_cast<torchportable::ParticleCollection&>(event.get(inputs_token_));;
-  const size_t batch_size = inputs.const_view().metadata().size();
-  auto outputs = torchportable::RegressionCollection(batch_size, event.queue());
-
-  // metadata for automatic tensor conversion
-  auto input_records = inputs.view().records();
-  auto output_records = outputs.view().records();
-  cms::torch::alpaka::SoAWrapper<torchportable::ParticleSoA> inputs_metadata(batch_size);
-  inputs_metadata.append_block("features", input_records.pt(), input_records.eta(), input_records.phi());
-  cms::torch::alpaka::SoAWrapper<torchportable::RegressionSoA> outputs_metadata(batch_size); 
-  outputs_metadata.append_block("preds", output_records.reco_pt());
-  cms::torch::alpaka::ModelMetadata<torchportable::ParticleSoA, torchportable::RegressionSoA> metadata(inputs_metadata, outputs_metadata);
-
-  // inference
-  NvtxScopedRange move_to_device("Regression::move_to_device");
-  if (cms::torch::alpaka::device(event.queue()) != globalCache()->device()) 
-    globalCache()->to(event.queue());
-  assert(cms::torch::alpaka::device(event.queue()) == globalCache()->device());  
-  move_to_device.end();
-  NvtxScopedRange infer_range("Regression::inference");
-  globalCache()->forward(metadata);
-  infer_range.end();
-  
-  // assert output match expected  
-  kernels_->AssertRegression(event.queue(), outputs);
-  event.emplace(outputs_token_, std::move(outputs));
-  auto t2 = std::chrono::high_resolution_clock::now();
-  std::cout << "(Regression) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us" << std::endl; 
-  produce_range.end();
-}
-
-/**
+  void AotRegressionProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
+    auto t1 = std::chrono::high_resolution_clock::now();
+    NvtxScopedRange produce_range("Regression::produce");
+
+    // guard torch internal operations to not conflict with cmssw fw scheme
+    cms::torch::alpaka::Guard<Queue> guard(event.queue());
+    // sanity check for debug
+    assert(cms::torch::alpaka::queue_hash(event.queue()) == cms::torch::alpaka::current_stream_hash(event.queue()));
+
+    // get data
+    // TODO: const_cast should not be done by user
+    // in principle should not be done by anyone
+    // @see: torch::from_blob(void*)
+    auto &inputs = const_cast<torchportable::ParticleCollection &>(event.get(inputs_token_));
+    ;
+    const size_t batch_size = inputs.const_view().metadata().size();
+    auto outputs = torchportable::RegressionCollection(batch_size, event.queue());
+
+    // metadata for automatic tensor conversion
+    auto input_records = inputs.view().records();
+    auto output_records = outputs.view().records();
+    cms::torch::alpaka::SoAWrapper<torchportable::ParticleSoA> inputs_metadata(batch_size);
+    inputs_metadata.append_block("features", input_records.pt(), input_records.eta(), input_records.phi());
+    cms::torch::alpaka::SoAWrapper<torchportable::RegressionSoA> outputs_metadata(batch_size);
+    outputs_metadata.append_block("preds", output_records.reco_pt());
+    cms::torch::alpaka::ModelMetadata<torchportable::ParticleSoA, torchportable::RegressionSoA> metadata(
+        inputs_metadata, outputs_metadata);
+
+    // inference
+    NvtxScopedRange move_to_device("Regression::move_to_device");
+    if (cms::torch::alpaka::device(event.queue()) != globalCache()->device())
+      globalCache()->to(event.queue());
+    assert(cms::torch::alpaka::device(event.queue()) == globalCache()->device());
+    move_to_device.end();
+    NvtxScopedRange infer_range("Regression::inference");
+    globalCache()->forward(metadata);
+    infer_range.end();
+
+    // assert output match expected
+    kernels_->AssertRegression(event.queue(), outputs);
+    event.emplace(outputs_token_, std::move(outputs));
+    auto t2 = std::chrono::high_resolution_clock::now();
+    std::cout << "(Regression) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us"
+              << std::endl;
+    produce_range.end();
+  }
+
+  /**
  * @brief Fills the parameter descriptions for this module.
  * @param descriptions Configuration descriptions object to fill.
  */
-void AotRegressionProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<edm::InputTag>("inputs");
-  desc.add<edm::FileInPath>("modelPath");
-  descriptions.addWithDefaultLabel(desc);
-}
+  void AotRegressionProducer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
+    edm::ParameterSetDescription desc;
+    desc.add<edm::InputTag>("inputs");
+    desc.add<edm::FileInPath>("modelPath");
+    descriptions.addWithDefaultLabel(desc);
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/CombinatoricsProducer.cc b/PhysicsTools/PyTorch/plugins/alpaka/CombinatoricsProducer.cc
index d3a209584c5..bfc2ed620f6 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/CombinatoricsProducer.cc
+++ b/PhysicsTools/PyTorch/plugins/alpaka/CombinatoricsProducer.cc
@@ -17,72 +17,72 @@
 #include "PhysicsTools/PyTorch/interface/Nvtx.h"
 #include "PhysicsTools/PyTorch/plugins/alpaka/Kernels.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-/**
+  /**
  * @class CombinatoricsProducer
  * @brief A dummy Alpaka EDProducer that fills a particle collection.
  *
  * This producer simulates combinatorics logic by filling a new ParticleCollection with
  * placeholder values via a kernel call. Intended primarily for testing and validation.
- */  
-class CombinatoricsProducer : public stream::EDProducer<> {
- public:
- CombinatoricsProducer(const edm::ParameterSet &params);
+ */
+  class CombinatoricsProducer : public stream::EDProducer<> {
+  public:
+    CombinatoricsProducer(const edm::ParameterSet &params);
 
-  void produce(device::Event &event, const device::EventSetup &event_setup) override;
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+    void produce(device::Event &event, const device::EventSetup &event_setup) override;
+    static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
 
- private:  
-  const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;  /**< Token to get input data. */
-  const device::EDPutToken<torchportable::ParticleCollection> outputs_token_;  /**< Token to store output data. */
-  std::unique_ptr<Kernels> kernels_ = nullptr;  /**< Kernel helper object. */
-};
+  private:
+    const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;  /**< Token to get input data. */
+    const device::EDPutToken<torchportable::ParticleCollection> outputs_token_; /**< Token to store output data. */
+    std::unique_ptr<Kernels> kernels_ = nullptr;                                /**< Kernel helper object. */
+  };
 
-CombinatoricsProducer::CombinatoricsProducer(edm::ParameterSet const& params)
-  : EDProducer<>(params),
-    inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
-    outputs_token_{produces()},
-    kernels_(std::make_unique<Kernels>()) {}
+  CombinatoricsProducer::CombinatoricsProducer(edm::ParameterSet const &params)
+      : EDProducer<>(params),
+        inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
+        outputs_token_{produces()},
+        kernels_(std::make_unique<Kernels>()) {}
 
-/**
+  /**
  * @brief Processes the event and fills output with mock data using a kernel.
  * @param event The current event.
  * @param event_setup Setup information for the event.
- */    
-void CombinatoricsProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
-  auto t1 = std::chrono::high_resolution_clock::now();
-  // debug stream usage in concurrently scheduled modules
-  NvtxScopedRange produce_range("Combinatorics::produce");
+ */
+  void CombinatoricsProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
+    auto t1 = std::chrono::high_resolution_clock::now();
+    // debug stream usage in concurrently scheduled modules
+    NvtxScopedRange produce_range("Combinatorics::produce");
 
-  // get data
-  const auto& inputs = event.get(inputs_token_);
-  const size_t batch_size = inputs.const_view().metadata().size();
-  auto outputs = torchportable::ParticleCollection(batch_size, event.queue());
+    // get data
+    const auto &inputs = event.get(inputs_token_);
+    const size_t batch_size = inputs.const_view().metadata().size();
+    auto outputs = torchportable::ParticleCollection(batch_size, event.queue());
 
-  // dummy kernel emulation
-  NvtxScopedRange kernel_range("Combinatorics::kernel");
-  kernels_->FillParticleCollection(event.queue(), outputs, 0.32f);
-  kernel_range.end();
+    // dummy kernel emulation
+    NvtxScopedRange kernel_range("Combinatorics::kernel");
+    kernels_->FillParticleCollection(event.queue(), outputs, 0.32f);
+    kernel_range.end();
 
-  // assert output match expected  
-  kernels_->AssertCombinatorics(event.queue(), outputs, 0.32f);
-  event.emplace(outputs_token_, std::move(outputs));
-  auto t2 = std::chrono::high_resolution_clock::now();
-  std::cout << "(Combinatorics) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us" << std::endl; 
-  produce_range.end();
-}
+    // assert output match expected
+    kernels_->AssertCombinatorics(event.queue(), outputs, 0.32f);
+    event.emplace(outputs_token_, std::move(outputs));
+    auto t2 = std::chrono::high_resolution_clock::now();
+    std::cout << "(Combinatorics) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count()
+              << " us" << std::endl;
+    produce_range.end();
+  }
 
-/**
+  /**
  * @brief Defines configuration parameters for the module.
  * @param descriptions Object to be populated with parameter descriptions.
  */
-void CombinatoricsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<edm::InputTag>("inputs");
-  descriptions.addWithDefaultLabel(desc);
-}
+  void CombinatoricsProducer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
+    edm::ParameterSetDescription desc;
+    desc.add<edm::InputTag>("inputs");
+    descriptions.addWithDefaultLabel(desc);
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/DataProducer.cc b/PhysicsTools/PyTorch/plugins/alpaka/DataProducer.cc
index ea2cb42063d..f623d61f910 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/DataProducer.cc
+++ b/PhysicsTools/PyTorch/plugins/alpaka/DataProducer.cc
@@ -9,57 +9,55 @@
 #include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-/**
+  /**
  * @class DataProducer
  * @brief A minimal Alpaka EDProducer that generates a dummy ParticleCollection.
  *
  * This module produces a particle collection of configurable batch size,
  * zero-initialized, for testing or as placeholder data in processing chains.
  */
-class DataProducer : public stream::EDProducer<> {
- public:
-  DataProducer(const edm::ParameterSet &params);
+  class DataProducer : public stream::EDProducer<> {
+  public:
+    DataProducer(const edm::ParameterSet &params);
 
-  void produce(device::Event &event, const device::EventSetup &event_setup) override;
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+    void produce(device::Event &event, const device::EventSetup &event_setup) override;
+    static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
 
- private:  
-  const device::EDPutToken<torchportable::ParticleCollection> sic_put_token_;  /**< Token to store output data. */
-  const uint32_t batch_size_;  /**< Size of the batch to be produced. */
-};
+  private:
+    const device::EDPutToken<torchportable::ParticleCollection> sic_put_token_; /**< Token to store output data. */
+    const uint32_t batch_size_; /**< Size of the batch to be produced. */
+  };
 
-DataProducer::DataProducer(edm::ParameterSet const& params)
-  : EDProducer<>(params),
-    sic_put_token_{produces()},
-    batch_size_(params.getParameter<uint32_t>("batchSize")) {}
+  DataProducer::DataProducer(edm::ParameterSet const &params)
+      : EDProducer<>(params), sic_put_token_{produces()}, batch_size_(params.getParameter<uint32_t>("batchSize")) {}
 
-/**
+  /**
  * @brief Produces a ParticleCollection of fixed size with zero-initialized data.
  * @param event The current Alpaka event.
  * @param event_setup Event setup context (not used).
  */
-void DataProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
-  auto t1 = std::chrono::high_resolution_clock::now();
-  // create dummy data
-  auto collection = torchportable::ParticleCollection(batch_size_, event.queue());
-  collection.zeroInitialise(event.queue());
-  event.emplace(sic_put_token_, std::move(collection));
-  auto t2 = std::chrono::high_resolution_clock::now();
-  std::cout << "(Data) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us" << std::endl; 
-}
+  void DataProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
+    auto t1 = std::chrono::high_resolution_clock::now();
+    // create dummy data
+    auto collection = torchportable::ParticleCollection(batch_size_, event.queue());
+    collection.zeroInitialise(event.queue());
+    event.emplace(sic_put_token_, std::move(collection));
+    auto t2 = std::chrono::high_resolution_clock::now();
+    std::cout << "(Data) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us"
+              << std::endl;
+  }
 
-/**
+  /**
  * @brief Describes the allowed configuration parameters for this module.
  * @param descriptions Configuration description object to populate.
  */
-void DataProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<uint32_t>("batchSize");
-  descriptions.addWithDefaultLabel(desc);
-}
+  void DataProducer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
+    edm::ParameterSetDescription desc;
+    desc.add<uint32_t>("batchSize");
+    descriptions.addWithDefaultLabel(desc);
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/JitClassificationProducer.cc b/PhysicsTools/PyTorch/plugins/alpaka/JitClassificationProducer.cc
index 90d791308ac..58eaf883c80 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/JitClassificationProducer.cc
+++ b/PhysicsTools/PyTorch/plugins/alpaka/JitClassificationProducer.cc
@@ -19,12 +19,11 @@
 #include "PhysicsTools/PyTorch/interface/Nvtx.h"
 #include "PhysicsTools/PyTorch/plugins/alpaka/Kernels.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using JitModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>; 
+  using JitModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>;
 
-/**
+  /**
  * @class JitClassificationProducer
  * @brief EDProducer that runs a classification model on input particles with Alpaka backend.
  *
@@ -33,103 +32,106 @@ using JitModel = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::
  * 
  * Demonstrates the interoperability of Alpaka with PyTorch and the use of SoA data layout with JIT models.
  */
-class JitClassificationProducer : public stream::EDProducer<edm::GlobalCache<JitModel>> {
- public:
- JitClassificationProducer(const edm::ParameterSet &params, const JitModel *cache);
+  class JitClassificationProducer : public stream::EDProducer<edm::GlobalCache<JitModel>> {
+  public:
+    JitClassificationProducer(const edm::ParameterSet &params, const JitModel *cache);
 
-  static std::unique_ptr<JitModel> initializeGlobalCache(const edm::ParameterSet &params);
-  static void globalEndJob(const JitModel *cache);
+    static std::unique_ptr<JitModel> initializeGlobalCache(const edm::ParameterSet &params);
+    static void globalEndJob(const JitModel *cache);
 
-  void produce(device::Event &event, const device::EventSetup &event_setup) override;
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+    void produce(device::Event &event, const device::EventSetup &event_setup) override;
+    static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
 
- private:  
-  const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;  /**< Token to get input data. */
-  const device::EDPutToken<torchportable::ClassificationCollection> outputs_token_;  /**< Token to store output data. */
-  std::unique_ptr<Kernels> kernels_ = nullptr;  /**< Kernel utilities for post-inference validation. */
-};
+  private:
+    const device::EDGetToken<torchportable::ParticleCollection> inputs_token_;        /**< Token to get input data. */
+    const device::EDPutToken<torchportable::ClassificationCollection> outputs_token_; /**< Token to store output data. */
+    std::unique_ptr<Kernels> kernels_ = nullptr; /**< Kernel utilities for post-inference validation. */
+  };
 
-JitClassificationProducer::JitClassificationProducer(edm::ParameterSet const& params, const JitModel *cache)
-  : EDProducer<edm::GlobalCache<JitModel>>(params),
-    inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
-    outputs_token_{produces()},
-    kernels_{std::make_unique<Kernels>()} {}
+  JitClassificationProducer::JitClassificationProducer(edm::ParameterSet const &params, const JitModel *cache)
+      : EDProducer<edm::GlobalCache<JitModel>>(params),
+        inputs_token_{consumes(params.getParameter<edm::InputTag>("inputs"))},
+        outputs_token_{produces()},
+        kernels_{std::make_unique<Kernels>()} {}
 
-/**
+  /**
  * @brief Initializes the global model cache by loading a TorchScript JIT model.
  * @param params Configuration parameters.
  * @return A unique pointer to the model.
  */
-std::unique_ptr<JitModel> JitClassificationProducer::initializeGlobalCache(const edm::ParameterSet &param) {
-  auto model_path = param.getParameter<edm::FileInPath>("modelPath").fullPath();
-  return std::make_unique<JitModel>(model_path);
-}
+  std::unique_ptr<JitModel> JitClassificationProducer::initializeGlobalCache(const edm::ParameterSet &param) {
+    auto model_path = param.getParameter<edm::FileInPath>("modelPath").fullPath();
+    return std::make_unique<JitModel>(model_path);
+  }
 
-/**
+  /**
  * @brief Called once at global job end. No-op here.
  * @param cache The global model cache instance.
  */
-void JitClassificationProducer::globalEndJob(const JitModel *cache) {}
+  void JitClassificationProducer::globalEndJob(const JitModel *cache) {}
 
-/**
+  /**
  * @brief Executes model inference on the particle collection.
  * @param event The current Alpaka event.
  * @param event_setup Event setup context (unused).
  */
-void JitClassificationProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
-  auto t1 = std::chrono::high_resolution_clock::now();
-  NvtxScopedRange produce_range("Classifier::produce");
-
-  // guard torch internal operations to not conflict with fw execution scheme
-  cms::torch::alpaka::Guard<Queue> guard(event.queue());
-  // sanity check 
-  assert(cms::torch::alpaka::queue_hash(event.queue()) == cms::torch::alpaka::current_stream_hash(event.queue()));
-
-  // get data
-  // TODO: const_cast should not be done by user
-  // in principle should not be done by anyone
-  // @see: torch::from_blob(void*) 
-  auto& inputs =  const_cast<torchportable::ParticleCollection&>(event.get(inputs_token_));;
-  const size_t batch_size = inputs.const_view().metadata().size();
-  auto outputs = torchportable::ClassificationCollection(batch_size, event.queue());
-
-  // metadata for automatic tensor conversion
-  auto input_records = inputs.view().records();
-  auto output_records = outputs.view().records();
-  cms::torch::alpaka::SoAWrapper<torchportable::ParticleSoA> inputs_metadata(batch_size);
-  inputs_metadata.append_block("features", input_records.pt(), input_records.eta(), input_records.phi());
-  cms::torch::alpaka::SoAWrapper<torchportable::ClassificationSoA> outputs_metadata(batch_size); 
-  outputs_metadata.append_block("preds", output_records.c1(), output_records.c2());
-  cms::torch::alpaka::ModelMetadata<torchportable::ParticleSoA, torchportable::ClassificationSoA> metadata(inputs_metadata, outputs_metadata);
-
-  // inference
-  NvtxScopedRange move_to_device("Classifier::move_to_device");
-  if (cms::torch::alpaka::device(event.queue()) != globalCache()->device()) 
-    globalCache()->to(event.queue());
-  assert(cms::torch::alpaka::device(event.queue()) == globalCache()->device());  
-  move_to_device.end();
-  NvtxScopedRange infer_range("Classifier::inference");
-  globalCache()->forward(metadata);
-  infer_range.end();
-
-  // assert output match expected 
-  kernels_->AssertClassification(event.queue(), outputs);
-  event.emplace(outputs_token_, std::move(outputs));
-  auto t2 = std::chrono::high_resolution_clock::now();
-  std::cout << "(Classification) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << " us" << std::endl; 
-  produce_range.end();
-}
-
-/**
+  void JitClassificationProducer::produce(device::Event &event, const device::EventSetup &event_setup) {
+    auto t1 = std::chrono::high_resolution_clock::now();
+    NvtxScopedRange produce_range("Classifier::produce");
+
+    // guard torch internal operations to not conflict with fw execution scheme
+    cms::torch::alpaka::Guard<Queue> guard(event.queue());
+    // sanity check
+    assert(cms::torch::alpaka::queue_hash(event.queue()) == cms::torch::alpaka::current_stream_hash(event.queue()));
+
+    // get data
+    // TODO: const_cast should not be done by user
+    // in principle should not be done by anyone
+    // @see: torch::from_blob(void*)
+    auto &inputs = const_cast<torchportable::ParticleCollection &>(event.get(inputs_token_));
+    ;
+    const size_t batch_size = inputs.const_view().metadata().size();
+    auto outputs = torchportable::ClassificationCollection(batch_size, event.queue());
+
+    // metadata for automatic tensor conversion
+    auto input_records = inputs.view().records();
+    auto output_records = outputs.view().records();
+    cms::torch::alpaka::SoAWrapper<torchportable::ParticleSoA> inputs_metadata(batch_size);
+    inputs_metadata.append_block("features", input_records.pt(), input_records.eta(), input_records.phi());
+    cms::torch::alpaka::SoAWrapper<torchportable::ClassificationSoA> outputs_metadata(batch_size);
+    outputs_metadata.append_block("preds", output_records.c1(), output_records.c2());
+    cms::torch::alpaka::ModelMetadata<torchportable::ParticleSoA, torchportable::ClassificationSoA> metadata(
+        inputs_metadata, outputs_metadata);
+
+    // inference
+    NvtxScopedRange move_to_device("Classifier::move_to_device");
+    if (cms::torch::alpaka::device(event.queue()) != globalCache()->device())
+      globalCache()->to(event.queue());
+    assert(cms::torch::alpaka::device(event.queue()) == globalCache()->device());
+    move_to_device.end();
+    NvtxScopedRange infer_range("Classifier::inference");
+    globalCache()->forward(metadata);
+    infer_range.end();
+
+    // assert output match expected
+    kernels_->AssertClassification(event.queue(), outputs);
+    event.emplace(outputs_token_, std::move(outputs));
+    auto t2 = std::chrono::high_resolution_clock::now();
+    std::cout << "(Classification) OK - " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count()
+              << " us" << std::endl;
+    produce_range.end();
+  }
+
+  /**
  * @brief Describes module configuration parameters.
  * @param descriptions Configuration description object.
  */
-void JitClassificationProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<edm::InputTag>("inputs");
-  desc.add<edm::FileInPath>("modelPath");
-  descriptions.addWithDefaultLabel(desc);
-}
+  void JitClassificationProducer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
+    edm::ParameterSetDescription desc;
+    desc.add<edm::InputTag>("inputs");
+    desc.add<edm::FileInPath>("modelPath");
+    descriptions.addWithDefaultLabel(desc);
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/Kernels.dev.cc b/PhysicsTools/PyTorch/plugins/alpaka/Kernels.dev.cc
index 476812a11be..f4bf0cc35ea 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/Kernels.dev.cc
+++ b/PhysicsTools/PyTorch/plugins/alpaka/Kernels.dev.cc
@@ -6,25 +6,23 @@
 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
 #include "PhysicsTools/PyTorch/plugins/alpaka/Kernels.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using namespace cms::alpakatools;
-
+  using namespace cms::alpakatools;
 
-class FillParticleCollectionKernel {
-public:
-  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-  ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ParticleCollection::View data, float value) const {
-    for (auto tid : uniform_elements(acc, data.metadata().size())) {
-      data.pt()[tid] = value;
-      data.phi()[tid] = value;
-      data.eta()[tid] = value;
+  class FillParticleCollectionKernel {
+  public:
+    template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ParticleCollection::View data, float value) const {
+      for (auto tid : uniform_elements(acc, data.metadata().size())) {
+        data.pt()[tid] = value;
+        data.phi()[tid] = value;
+        data.eta()[tid] = value;
+      }
     }
-  }
-};
+  };
 
-/**
+  /**
  * @brief Fill all values in a particle collection with a specified constant.
  * 
  * For debugging and unit testing.
@@ -33,26 +31,26 @@ public:
  * @param data Particle collection to be modified.
  * @param value Constant value to fill the collection with.
  */
-void Kernels::FillParticleCollection(Queue &queue, torchportable::ParticleCollection &data, float value) {
-  uint32_t threads_per_block = 512;
-  uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);      
-  auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
-  alpaka::exec<Acc1D>(queue, grid, FillParticleCollectionKernel{}, data.view(), value);
-}
+  void Kernels::FillParticleCollection(Queue &queue, torchportable::ParticleCollection &data, float value) {
+    uint32_t threads_per_block = 512;
+    uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);
+    auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
+    alpaka::exec<Acc1D>(queue, grid, FillParticleCollectionKernel{}, data.view(), value);
+  }
 
-class AssertCombinatoricsKernel {
+  class AssertCombinatoricsKernel {
   public:
-   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-   ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ParticleCollection::View data, float value) const {
-     for (auto tid : uniform_elements(acc, data.metadata().size())) {
-       ALPAKA_ASSERT_ACC(data.pt()[tid] == value);
-       ALPAKA_ASSERT_ACC(data.phi()[tid] == value);
-       ALPAKA_ASSERT_ACC(data.eta()[tid] == value);
-     }
-   }
- };
- 
-/**
+    template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ParticleCollection::View data, float value) const {
+      for (auto tid : uniform_elements(acc, data.metadata().size())) {
+        ALPAKA_ASSERT_ACC(data.pt()[tid] == value);
+        ALPAKA_ASSERT_ACC(data.phi()[tid] == value);
+        ALPAKA_ASSERT_ACC(data.eta()[tid] == value);
+      }
+    }
+  };
+
+  /**
  * @brief Assert that the particle collection obeys certain combinatoric relationships.
  *
  * Used in test scenarios to verify data layout or transformation logic.
@@ -61,25 +59,25 @@ class AssertCombinatoricsKernel {
  * @param data Particle collection to check.
  * @param value Reference value for validation.
  */
- void Kernels::AssertCombinatorics(Queue &queue, torchportable::ParticleCollection &data, float value) {
-   uint32_t threads_per_block = 512;
-   uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);      
-   auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
-   alpaka::exec<Acc1D>(queue, grid, AssertCombinatoricsKernel{}, data.view(), value);
- }
+  void Kernels::AssertCombinatorics(Queue &queue, torchportable::ParticleCollection &data, float value) {
+    uint32_t threads_per_block = 512;
+    uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);
+    auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
+    alpaka::exec<Acc1D>(queue, grid, AssertCombinatoricsKernel{}, data.view(), value);
+  }
 
-class AssertClassificationKernel {
- public:
-  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-  ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ClassificationCollection::View data) const {
-    for (auto tid : uniform_elements(acc, data.metadata().size())) {
-      ALPAKA_ASSERT_ACC(data.c1()[tid] == 0.5f);
-      ALPAKA_ASSERT_ACC(data.c2()[tid] == 0.5f);
+  class AssertClassificationKernel {
+  public:
+    template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::ClassificationCollection::View data) const {
+      for (auto tid : uniform_elements(acc, data.metadata().size())) {
+        ALPAKA_ASSERT_ACC(data.c1()[tid] == 0.5f);
+        ALPAKA_ASSERT_ACC(data.c2()[tid] == 0.5f);
+      }
     }
-  }
-};
+  };
 
-/**
+  /**
  * @brief Validate classification model outputs.
  *
  * Checks whether the classification outputs match expected format or values.
@@ -88,24 +86,24 @@ class AssertClassificationKernel {
  * @param queue Alpaka execution queue.
  * @param data Classification output collection.
  */
-void Kernels::AssertClassification(Queue &queue, torchportable::ClassificationCollection &data) {
-  uint32_t threads_per_block = 512;
-  uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);      
-  auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
-  alpaka::exec<Acc1D>(queue, grid, AssertClassificationKernel{}, data.view());
-}
+  void Kernels::AssertClassification(Queue &queue, torchportable::ClassificationCollection &data) {
+    uint32_t threads_per_block = 512;
+    uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);
+    auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
+    alpaka::exec<Acc1D>(queue, grid, AssertClassificationKernel{}, data.view());
+  }
 
-class AssertRegressionKernel {
- public:
-  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-  ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::RegressionCollection::View data) const {
-    for (auto tid : uniform_elements(acc, data.metadata().size())) {
-      ALPAKA_ASSERT_ACC(data.reco_pt()[tid] == 0.5f);
+  class AssertRegressionKernel {
+  public:
+    template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(const TAcc &acc, torchportable::RegressionCollection::View data) const {
+      for (auto tid : uniform_elements(acc, data.metadata().size())) {
+        ALPAKA_ASSERT_ACC(data.reco_pt()[tid] == 0.5f);
+      }
     }
-  }
-};
+  };
 
-/**
+  /**
  * @brief Validate regression model outputs.
  *
  * Similar to classification checks, this is used for asserting output correctness in regression tasks.
@@ -113,11 +111,11 @@ class AssertRegressionKernel {
  * @param queue Alpaka execution queue.
  * @param data Regression output collection.
  */
-void Kernels::AssertRegression(Queue &queue, torchportable::RegressionCollection &data) {
-  uint32_t threads_per_block = 512;
-  uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);      
-  auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
-  alpaka::exec<Acc1D>(queue, grid, AssertRegressionKernel{}, data.view());
-}
+  void Kernels::AssertRegression(Queue &queue, torchportable::RegressionCollection &data) {
+    uint32_t threads_per_block = 512;
+    uint32_t blocks_per_grid = divide_up_by(data.view().metadata().size(), threads_per_block);
+    auto grid = make_workdiv<Acc1D>(blocks_per_grid, threads_per_block);
+    alpaka::exec<Acc1D>(queue, grid, AssertRegressionKernel{}, data.view());
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/plugins/alpaka/Kernels.h b/PhysicsTools/PyTorch/plugins/alpaka/Kernels.h
index 8a62a509aa2..58e6dfd1651 100644
--- a/PhysicsTools/PyTorch/plugins/alpaka/Kernels.h
+++ b/PhysicsTools/PyTorch/plugins/alpaka/Kernels.h
@@ -8,20 +8,20 @@
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-/**
+  /**
  * @class Kernels
  * @brief Utility class containing helper functions to run simple Alpaka kernels for testing or validation.
  *
  * This class provides simple device-side functionality for modifying and verifying
  * collections of structured SoA data, such as particles, classification outputs, and regressions.
  */
-class Kernels {
- public:
-  void FillParticleCollection(Queue &queue, torchportable::ParticleCollection &data, float value);
-  void AssertCombinatorics(Queue &queue, torchportable::ParticleCollection &data, float value);
-  void AssertClassification(Queue &queue, torchportable::ClassificationCollection &data);
-  void AssertRegression(Queue &queue, torchportable::RegressionCollection &data);
-};
+  class Kernels {
+  public:
+    void FillParticleCollection(Queue &queue, torchportable::ParticleCollection &data, float value);
+    void AssertCombinatorics(Queue &queue, torchportable::ParticleCollection &data, float value);
+    void AssertClassification(Queue &queue, torchportable::ClassificationCollection &data);
+    void AssertRegression(Queue &queue, torchportable::RegressionCollection &data);
+  };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/PhysicsTools/PyTorch/test/alpaka/testModelAOT.cc b/PhysicsTools/PyTorch/test/alpaka/testModelAOT.cc
index 03a27610783..83fe9bc86c6 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testModelAOT.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testModelAOT.cc
@@ -6,64 +6,65 @@
 #include "PhysicsTools/PyTorch/interface/Model.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using namespace ::cms::torch::alpaka;  
+  using namespace ::cms::torch::alpaka;
 
-class TestModelAOT : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestModelAOT);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
+  class TestModelAOT : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestModelAOT);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
 
- public:
-  void test();
-  std::string shared_lib();
-};
+  public:
+    void test();
+    std::string shared_lib();
+  };
 
-CPPUNIT_TEST_SUITE_REGISTRATION(TestModelAOT);
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestModelAOT);
 
-std::string TestModelAOT::shared_lib() {
+  std::string TestModelAOT::shared_lib() {
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-  return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cuda_el9_amd64_gcc12.pt2");
+    return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cuda_el9_amd64_gcc12.pt2");
 #elif ALPAKA_ACC_GPU_HIP_ENABLED
-  std::cerr << "FAILED: ROCm backend not yet supported, see: https://github.com/pytorch/pytorch/blob/main/aten/CMakeLists.txt#L75" << std::endl;
-  return "";
+    std::cerr << "FAILED: ROCm backend not yet supported, see: "
+                 "https://github.com/pytorch/pytorch/blob/main/aten/CMakeLists.txt#L75"
+              << std::endl;
+    return "";
 #elif ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
-  return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cpu_el9_amd64_gcc12.pt2");
+    return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cpu_el9_amd64_gcc12.pt2");
 #else
-  std::cerr << "FAILED: Unable to detect backend type." << std::endl;
-  return "";
+    std::cerr << "FAILED: Unable to detect backend type." << std::endl;
+    return "";
 #endif
-}
+  }
+
+  void TestModelAOT::test() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> devices = ::alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(!devices.empty());
+    const auto& device = devices[0];
+    Queue queue{device};
 
-void TestModelAOT::test() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> devices = ::alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(!devices.empty());
-  const auto& device = devices[0];
-  Queue queue{device};
+    const std::size_t batch_size = 2 << 10;
+    std::vector<::torch::IValue> inputs;
+    inputs.push_back(torch::ones({batch_size, 3}, cms::torch::alpaka::device(queue)));
 
-  const std::size_t batch_size = 2 << 10;
-  std::vector<::torch::IValue> inputs;
-  inputs.push_back(torch::ones({batch_size, 3}, cms::torch::alpaka::device(queue)));
+    std::vector<at::Tensor> inputs_tensor;
+    for (const auto& val : inputs)
+      inputs_tensor.push_back(val.toTensor());
 
-  std::vector<at::Tensor> inputs_tensor;
-  for (const auto& val : inputs)
-    inputs_tensor.push_back(val.toTensor());
-  
-  auto lib_path = shared_lib();
-  CPPUNIT_ASSERT_MESSAGE("FAILED: Architecture compiled shared library missing.", !lib_path.empty());
+    auto lib_path = shared_lib();
+    CPPUNIT_ASSERT_MESSAGE("FAILED: Architecture compiled shared library missing.", !lib_path.empty());
 
-  Model<CompilationType::kAheadOfTime> aot_model(lib_path);
-  aot_model.to(queue);
-  std::cout << "Device: " << aot_model.device() << std::endl;
-  CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == aot_model.device());
-  auto outputs = aot_model.forward(inputs_tensor);
-  for (const auto& val : outputs) {
-    CPPUNIT_ASSERT(::torch::allclose(val, ::torch::full_like(val, 0.5f)));
+    Model<CompilationType::kAheadOfTime> aot_model(lib_path);
+    aot_model.to(queue);
+    std::cout << "Device: " << aot_model.device() << std::endl;
+    CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == aot_model.device());
+    auto outputs = aot_model.forward(inputs_tensor);
+    for (const auto& val : outputs) {
+      CPPUNIT_ASSERT(::torch::allclose(val, ::torch::full_like(val, 0.5f)));
+    }
   }
-}
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testModelAlpakaNamespace.cc b/PhysicsTools/PyTorch/test/alpaka/testModelAlpakaNamespace.cc
index 1ec8b98a1bc..7c4dd6755c8 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testModelAlpakaNamespace.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testModelAlpakaNamespace.cc
@@ -6,39 +6,38 @@
 #include "PhysicsTools/PyTorch/interface/AlpakaConfig.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using namespace ::cms::torch::alpaka;  
-
-class TestModelAlpakaNamespace : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestModelAlpakaNamespace);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
-
-public:
-  void test();
-  const int64_t batch_size_ = 2 << 10;
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(TestModelAlpakaNamespace);
-
-void TestModelAlpakaNamespace::test() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> devices = ::alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(!devices.empty());
-  const auto& alpaka_device = devices[0];
-  Queue queue{alpaka_device};
-
-  auto device = cms::torch::alpaka::device(queue);
-  std::cout << "Device: " << device << std::endl;
-  auto inputs = ::torch::ones({batch_size_, 3}, device=device);
-  
-  ClassifierModel model;
-  model.to(device);
-  auto outputs = model.forward(inputs);
-  CPPUNIT_ASSERT(::torch::allclose(outputs, ::torch::full_like(outputs, 0.5f)));
-}
+  using namespace ::cms::torch::alpaka;
+
+  class TestModelAlpakaNamespace : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestModelAlpakaNamespace);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
+
+  public:
+    void test();
+    const int64_t batch_size_ = 2 << 10;
+  };
+
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestModelAlpakaNamespace);
+
+  void TestModelAlpakaNamespace::test() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> devices = ::alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(!devices.empty());
+    const auto& alpaka_device = devices[0];
+    Queue queue{alpaka_device};
+
+    auto device = cms::torch::alpaka::device(queue);
+    std::cout << "Device: " << device << std::endl;
+    auto inputs = ::torch::ones({batch_size_, 3}, device = device);
+
+    ClassifierModel model;
+    model.to(device);
+    auto outputs = model.forward(inputs);
+    CPPUNIT_ASSERT(::torch::allclose(outputs, ::torch::full_like(outputs, 0.5f)));
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testModelJIT.cc b/PhysicsTools/PyTorch/test/alpaka/testModelJIT.cc
index d2168838c52..5d616f2933d 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testModelJIT.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testModelJIT.cc
@@ -6,69 +6,68 @@
 #include "PhysicsTools/PyTorch/interface/Model.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-using namespace ::cms::torch::alpaka;  
-
-class TestModelJIT : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestModelJIT);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST(testMultiBranch);
-  CPPUNIT_TEST_SUITE_END();
-
- public:
-  void test();
-  void testMultiBranch();
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(TestModelJIT);
-
-void TestModelJIT::test() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> devices = ::alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(!devices.empty());
-  const auto& device = devices[0];
-  Queue queue{device};
-
-  const std::size_t batch_size = 2 << 10;
-  std::vector<::torch::IValue> inputs;
-  inputs.push_back(::torch::ones({batch_size, 3}, cms::torch::alpaka::device(queue)));
-
-  auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
-  Model<CompilationType::kJustInTime> jit_model(m_path);
-  jit_model.to(queue);
-  std::cout << "Device: " << jit_model.device() << std::endl;
-  CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
-  auto outputs = jit_model.forward(inputs);
-  CPPUNIT_ASSERT(::torch::allclose(outputs.toTensor(), ::torch::full_like(outputs.toTensor(), 0.5f)));
-}
-
-void TestModelJIT::testMultiBranch() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> devices = ::alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(!devices.empty());
-  const auto& device = devices[0];
-  Queue queue{device};
-
-  const std::size_t batch_size = 2 << 10;
-  std::vector<::torch::IValue> inputs;
-  inputs.push_back(::torch::ones({batch_size, 5}, cms::torch::alpaka::device(queue)));
-
-  auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_multi_branch_model.pt");
-  Model<CompilationType::kJustInTime> jit_model(m_path);
-  jit_model.to(queue);
-  std::cout << "Device: " << jit_model.device() << std::endl;
-  CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
-  auto outputs = jit_model.forward(inputs).toTuple();
-
-  auto class_probs = outputs->elements()[0].toTensor();
-  auto reg_value   = outputs->elements()[1].toTensor();
-
-  CPPUNIT_ASSERT(::torch::allclose(class_probs, ::torch::full_like(class_probs, 0.2f)));
-  CPPUNIT_ASSERT(::torch::allclose(reg_value, ::torch::full_like(reg_value, 15.7286f)));
-}
+  using namespace ::cms::torch::alpaka;
+
+  class TestModelJIT : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestModelJIT);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST(testMultiBranch);
+    CPPUNIT_TEST_SUITE_END();
+
+  public:
+    void test();
+    void testMultiBranch();
+  };
+
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestModelJIT);
+
+  void TestModelJIT::test() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> devices = ::alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(!devices.empty());
+    const auto& device = devices[0];
+    Queue queue{device};
+
+    const std::size_t batch_size = 2 << 10;
+    std::vector<::torch::IValue> inputs;
+    inputs.push_back(::torch::ones({batch_size, 3}, cms::torch::alpaka::device(queue)));
+
+    auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
+    Model<CompilationType::kJustInTime> jit_model(m_path);
+    jit_model.to(queue);
+    std::cout << "Device: " << jit_model.device() << std::endl;
+    CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
+    auto outputs = jit_model.forward(inputs);
+    CPPUNIT_ASSERT(::torch::allclose(outputs.toTensor(), ::torch::full_like(outputs.toTensor(), 0.5f)));
+  }
+
+  void TestModelJIT::testMultiBranch() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> devices = ::alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(!devices.empty());
+    const auto& device = devices[0];
+    Queue queue{device};
+
+    const std::size_t batch_size = 2 << 10;
+    std::vector<::torch::IValue> inputs;
+    inputs.push_back(::torch::ones({batch_size, 5}, cms::torch::alpaka::device(queue)));
+
+    auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_multi_branch_model.pt");
+    Model<CompilationType::kJustInTime> jit_model(m_path);
+    jit_model.to(queue);
+    std::cout << "Device: " << jit_model.device() << std::endl;
+    CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
+    auto outputs = jit_model.forward(inputs).toTuple();
+
+    auto class_probs = outputs->elements()[0].toTensor();
+    auto reg_value = outputs->elements()[1].toTensor();
+
+    CPPUNIT_ASSERT(::torch::allclose(class_probs, ::torch::full_like(class_probs, 0.2f)));
+    CPPUNIT_ASSERT(::torch::allclose(reg_value, ::torch::full_like(reg_value, 15.7286f)));
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceAOT.dev.cc b/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceAOT.dev.cc
index a7d9a07d52d..7c407fc25d9 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceAOT.dev.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceAOT.dev.cc
@@ -3,7 +3,6 @@
 #include <torch/torch.h>
 #include <cppunit/extensions/HelperMacros.h>
 
-
 #include "DataFormats/Portable/interface/PortableCollection.h"
 #include "DataFormats/Portable/interface/PortableHostCollection.h"
 #include "DataFormats/SoATemplate/interface/SoALayout.h"
@@ -15,102 +14,94 @@
 #include "PhysicsTools/PyTorch/interface/SoAWrapper.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-GENERATE_SOA_LAYOUT(SoAInputsTemplate, 
-  SOA_COLUMN(float, x), 
-  SOA_COLUMN(float, y), 
-  SOA_COLUMN(float, z)
-)
+  GENERATE_SOA_LAYOUT(SoAInputsTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y), SOA_COLUMN(float, z))
 
-GENERATE_SOA_LAYOUT(SoAOutputsTemplate, 
-  SOA_COLUMN(float, prob)
-)
+  GENERATE_SOA_LAYOUT(SoAOutputsTemplate, SOA_COLUMN(float, prob))
 
-using SoAInputs = SoAInputsTemplate<>;
-using SoAOutputs = SoAOutputsTemplate<>;
+  using SoAInputs = SoAInputsTemplate<>;
+  using SoAOutputs = SoAOutputsTemplate<>;
 
+  class TestPortableInferenceAOT : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestPortableInferenceAOT);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
 
-class TestPortableInferenceAOT : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestPortableInferenceAOT);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
+  public:
+    void test();
+    std::string shared_lib();
+  };
 
- public:
-  void test();
-  std::string shared_lib();
-};
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestPortableInferenceAOT);
 
-CPPUNIT_TEST_SUITE_REGISTRATION(TestPortableInferenceAOT);
-
-std::string TestPortableInferenceAOT::shared_lib() {
+  std::string TestPortableInferenceAOT::shared_lib() {
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-  return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cuda_el9_amd64_gcc12.pt2");
+    return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cuda_el9_amd64_gcc12.pt2");
 #elif ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
-  return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cpu_el9_amd64_gcc12.pt2");
+    return get_path("/src/PhysicsTools/PyTorch/models/aot_regression_model_cpu_el9_amd64_gcc12.pt2");
 #else
-  std::cerr << "FAILED: Unable to detect backend type." << std::endl;
-  return "";
+    std::cerr << "FAILED: Unable to detect backend type." << std::endl;
+    return "";
 #endif
-}
-
-void TestPortableInferenceAOT::test() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> alpaka_devices = alpaka::getDevs(platform);
-  const auto& alpaka_host = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
-  CPPUNIT_ASSERT(alpaka_devices.size());
-  const auto& alpaka_device = alpaka_devices[0];
-  Queue queue{alpaka_device};
-
-  const std::size_t batch_size = 2 << 10;
-
-  // host structs
-  PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
-  PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
-  // device structs
-  PortableCollection<SoAInputs, Device> inputs_device(batch_size, alpaka_device);
-  PortableCollection<SoAOutputs, Device> outputs_device(batch_size, alpaka_device);
-  
-  // prepare inputs
-  for (size_t i = 0; i < batch_size; i++) { 
-    inputs_host.view().x()[i] = 0.0f;
-    inputs_host.view().y()[i] = 0.0f;
-    inputs_host.view().z()[i] = 0.0f;
   }
-  alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
-  alpaka::wait(queue);
-
-  {
-    // guard scope
-    cms::torch::alpaka::Guard<Queue> guard(queue);  
-
-    // instantiate model
-    auto lib_path = shared_lib();
-    CPPUNIT_ASSERT_MESSAGE("FAILED: Architecture compiled shared library is missing.", !lib_path.empty());
-    auto model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kAheadOfTime>(lib_path);
-    model.to(queue);
-    CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == model.device());
-    std::cout << "Device: " << model.device() << std::endl;
-
-    // metadata for automatic tensor conversion
-    auto input_records = inputs_device.view().records();
-    auto output_records = outputs_device.view().records();
-    cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
-    inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
-    cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size); 
-    outputs_metadata.append_block("prob", output_records.prob());
-    cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
-    // inference
-    model.forward(metadata);
-    // check outputs
-    alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
-    alpaka::wait(queue);
+
+  void TestPortableInferenceAOT::test() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> alpaka_devices = alpaka::getDevs(platform);
+    const auto& alpaka_host = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
+    CPPUNIT_ASSERT(alpaka_devices.size());
+    const auto& alpaka_device = alpaka_devices[0];
+    Queue queue{alpaka_device};
+
+    const std::size_t batch_size = 2 << 10;
+
+    // host structs
+    PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
+    PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
+    // device structs
+    PortableCollection<SoAInputs, Device> inputs_device(batch_size, alpaka_device);
+    PortableCollection<SoAOutputs, Device> outputs_device(batch_size, alpaka_device);
+
+    // prepare inputs
     for (size_t i = 0; i < batch_size; i++) {
-      CPPUNIT_ASSERT(outputs_host.const_view().prob()[i] == 0.5f);
+      inputs_host.view().x()[i] = 0.0f;
+      inputs_host.view().y()[i] = 0.0f;
+      inputs_host.view().z()[i] = 0.0f;
+    }
+    alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
+    alpaka::wait(queue);
+
+    {
+      // guard scope
+      cms::torch::alpaka::Guard<Queue> guard(queue);
+
+      // instantiate model
+      auto lib_path = shared_lib();
+      CPPUNIT_ASSERT_MESSAGE("FAILED: Architecture compiled shared library is missing.", !lib_path.empty());
+      auto model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kAheadOfTime>(lib_path);
+      model.to(queue);
+      CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == model.device());
+      std::cout << "Device: " << model.device() << std::endl;
+
+      // metadata for automatic tensor conversion
+      auto input_records = inputs_device.view().records();
+      auto output_records = outputs_device.view().records();
+      cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
+      inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
+      cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size);
+      outputs_metadata.append_block("prob", output_records.prob());
+      cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
+      // inference
+      model.forward(metadata);
+      // check outputs
+      alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
+      alpaka::wait(queue);
+      for (size_t i = 0; i < batch_size; i++) {
+        CPPUNIT_ASSERT(outputs_host.const_view().prob()[i] == 0.5f);
+      }
     }
   }
-}
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceJIT.cc b/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceJIT.cc
index 3abe82769a3..b56350bbdc9 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceJIT.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testPortableInferenceJIT.cc
@@ -3,7 +3,6 @@
 #include <torch/torch.h>
 #include <cppunit/extensions/HelperMacros.h>
 
-
 #include "DataFormats/Portable/interface/PortableCollection.h"
 #include "DataFormats/Portable/interface/PortableHostCollection.h"
 #include "DataFormats/SoATemplate/interface/SoALayout.h"
@@ -15,91 +14,83 @@
 #include "PhysicsTools/PyTorch/interface/SoAWrapper.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-GENERATE_SOA_LAYOUT(SoAInputsTemplate, 
-  SOA_COLUMN(float, x), 
-  SOA_COLUMN(float, y), 
-  SOA_COLUMN(float, z)
-)
-
-GENERATE_SOA_LAYOUT(SoAOutputsTemplate, 
-  SOA_COLUMN(float, m), 
-  SOA_COLUMN(float, n)
-)
-
-using SoAInputs = SoAInputsTemplate<>;
-using SoAOutputs = SoAOutputsTemplate<>;
-
-class TestPortableInferenceJIT : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestPortableInferenceJIT);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
-
- public:
-  void test();
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(TestPortableInferenceJIT);
-
-void TestPortableInferenceJIT::test() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> alpaka_devices = alpaka::getDevs(platform);
-  const auto& alpaka_host = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
-  CPPUNIT_ASSERT(alpaka_devices.size());
-  const auto& alpaka_device = alpaka_devices[0];
-  Queue queue{alpaka_device};
-
-  const std::size_t batch_size = 32;
-
-  // host structs
-  PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
-  PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
-  // device structs
-  PortableCollection<SoAInputs, Device> inputs_device(batch_size, alpaka_device);
-  PortableCollection<SoAOutputs, Device> outputs_device(batch_size, alpaka_device);
-  
-  // prepare inputs
-  for (size_t i = 0; i < batch_size; i++) { 
-    inputs_host.view().x()[i] = 0.0f;
-    inputs_host.view().y()[i] = 0.0f;
-    inputs_host.view().z()[i] = 0.0f;
-  }
-  alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
-  alpaka::wait(queue);
-
-  {
-    // guard scope
-    cms::torch::alpaka::Guard<Queue> guard(queue);  
-
-    // instantiate model
-    auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
-    auto model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>(m_path);
-    model.to(queue);
-    CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == model.device());
-    std::cout << "Device: " << model.device() << std::endl;
-
-    // metadata for automatic tensor conversion
-    auto input_records = inputs_device.view().records();
-    auto output_records = outputs_device.view().records();
-    cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
-    inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
-    cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size); 
-    outputs_metadata.append_block("preds", output_records.m(), output_records.n());
-    cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
-    // inference
-    model.forward(metadata);
-    // check outputs
-    alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
-    alpaka::wait(queue);
+  GENERATE_SOA_LAYOUT(SoAInputsTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y), SOA_COLUMN(float, z))
+
+  GENERATE_SOA_LAYOUT(SoAOutputsTemplate, SOA_COLUMN(float, m), SOA_COLUMN(float, n))
+
+  using SoAInputs = SoAInputsTemplate<>;
+  using SoAOutputs = SoAOutputsTemplate<>;
+
+  class TestPortableInferenceJIT : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestPortableInferenceJIT);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
+
+  public:
+    void test();
+  };
 
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestPortableInferenceJIT);
+
+  void TestPortableInferenceJIT::test() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> alpaka_devices = alpaka::getDevs(platform);
+    const auto& alpaka_host = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
+    CPPUNIT_ASSERT(alpaka_devices.size());
+    const auto& alpaka_device = alpaka_devices[0];
+    Queue queue{alpaka_device};
+
+    const std::size_t batch_size = 32;
+
+    // host structs
+    PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
+    PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
+    // device structs
+    PortableCollection<SoAInputs, Device> inputs_device(batch_size, alpaka_device);
+    PortableCollection<SoAOutputs, Device> outputs_device(batch_size, alpaka_device);
+
+    // prepare inputs
     for (size_t i = 0; i < batch_size; i++) {
-      CPPUNIT_ASSERT(outputs_host.const_view().m()[i] == 0.5f);
-      CPPUNIT_ASSERT(outputs_host.const_view().n()[i] == 0.5f);
+      inputs_host.view().x()[i] = 0.0f;
+      inputs_host.view().y()[i] = 0.0f;
+      inputs_host.view().z()[i] = 0.0f;
+    }
+    alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
+    alpaka::wait(queue);
+
+    {
+      // guard scope
+      cms::torch::alpaka::Guard<Queue> guard(queue);
+
+      // instantiate model
+      auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
+      auto model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>(m_path);
+      model.to(queue);
+      CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == model.device());
+      std::cout << "Device: " << model.device() << std::endl;
+
+      // metadata for automatic tensor conversion
+      auto input_records = inputs_device.view().records();
+      auto output_records = outputs_device.view().records();
+      cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
+      inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
+      cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size);
+      outputs_metadata.append_block("preds", output_records.m(), output_records.n());
+      cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
+      // inference
+      model.forward(metadata);
+      // check outputs
+      alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
+      alpaka::wait(queue);
+
+      for (size_t i = 0; i < batch_size; i++) {
+        CPPUNIT_ASSERT(outputs_host.const_view().m()[i] == 0.5f);
+        CPPUNIT_ASSERT(outputs_host.const_view().n()[i] == 0.5f);
+      }
     }
   }
-}
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testSOADataTypes.dev.cc b/PhysicsTools/PyTorch/test/alpaka/testSOADataTypes.dev.cc
index a99891fcc0f..dfe6dde6ffd 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testSOADataTypes.dev.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testSOADataTypes.dev.cc
@@ -23,153 +23,151 @@
 #include "PhysicsTools/PyTorch/interface/AlpakaConfig.h"
 #include "PhysicsTools/PyTorch/interface/Converter.h"
 
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
+  using namespace ::cms::torch::alpaka;
+
+  class TestSOADataTypes : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestSOADataTypes);
+    CPPUNIT_TEST(testInterfaceVerbose);
+    CPPUNIT_TEST(testMultiOutput);
+    CPPUNIT_TEST(testSingleElement);
+    CPPUNIT_TEST(testNoElement);
+    CPPUNIT_TEST(testEmptyMetadata);
+    CPPUNIT_TEST_SUITE_END();
+
+  public:
+    void testInterfaceVerbose();
+    void testIncorrectMetadata();
+    void testMultiOutput();
+    void testSingleElement();
+    void testNoElement();
+    void testEmptyMetadata();
+  };
 
-namespace ALPAKA_ACCELERATOR_NAMESPACE {
-  
-using namespace ::cms::torch::alpaka;
-
-class TestSOADataTypes : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestSOADataTypes);
-  CPPUNIT_TEST(testInterfaceVerbose);
-  CPPUNIT_TEST(testMultiOutput);
-  CPPUNIT_TEST(testSingleElement);
-  CPPUNIT_TEST(testNoElement);
-  CPPUNIT_TEST(testEmptyMetadata);
-  CPPUNIT_TEST_SUITE_END();
-
- public:
-  void testInterfaceVerbose();
-  void testIncorrectMetadata();
-  void testMultiOutput();
-  void testSingleElement();
-  void testNoElement();
-  void testEmptyMetadata();
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(TestSOADataTypes);
-
-GENERATE_SOA_LAYOUT(SoATemplate,
-  SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
-  SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
-
-  SOA_EIGEN_COLUMN(Eigen::Matrix2f, c),
-
-  SOA_COLUMN(double, x),
-  SOA_COLUMN(double, y),
-  SOA_COLUMN(double, z),
-
-  SOA_SCALAR(float, type),
-  SOA_SCALAR(int, someNumber),
-
-  SOA_COLUMN(double, v),
-  SOA_COLUMN(double, w));
-
-using SoA = SoATemplate<>;
-using SoAView = SoA::View;
-using SoAMetaRecords = SoA::View::Metarecords;
-
-constexpr auto tol = 1.0e-5;
-
-class FillKernel {
- public:
-  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-  ALPAKA_FN_ACC void operator()(TAcc const& acc, PortableCollection<SoA, Device>::View view) const {
-    if (cms::alpakatools::once_per_grid(acc)) {
-      view.type() = 4;
-      view.someNumber() = 5;
-    }
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestSOADataTypes);
 
-    for (int32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-      view[i].a()(0) = 1 + i;
-      view[i].a()(1) = 2 + i;
-      view[i].a()(2) = 3 + i;
+  GENERATE_SOA_LAYOUT(SoATemplate,
+                      SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
+                      SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
 
-      view[i].b()(0) = 4 + i;
-      view[i].b()(1) = 5 + i;
-      view[i].b()(2) = 6 + i;
+                      SOA_EIGEN_COLUMN(Eigen::Matrix2f, c),
 
-      view[i].c()(0, 0) = 4 + i;
-      view[i].c()(0, 1) = 6 + i;
-      view[i].c()(1, 0) = 8 + i;
-      view[i].c()(1, 1) = 10 + i;
+                      SOA_COLUMN(double, x),
+                      SOA_COLUMN(double, y),
+                      SOA_COLUMN(double, z),
 
-      view.x()[i] = 12 + i;
-      view.y()[i] = 1 + 2.5 * i;
-      view.z()[i] = 36 * i;
-    }
-  }
-};
-
-class InputVerifyKernel {
- public:
-  ALPAKA_FN_ACC void operator()(Acc1D const& acc, PortableCollection<SoA, Device>::View view) const {
-    if (cms::alpakatools::once_per_grid(acc)) {
-      ALPAKA_ASSERT_ACC(view.type() == 4);
-      ALPAKA_ASSERT_ACC(view.someNumber() == 5);
+                      SOA_SCALAR(float, type),
+                      SOA_SCALAR(int, someNumber),
+
+                      SOA_COLUMN(double, v),
+                      SOA_COLUMN(double, w));
+
+  using SoA = SoATemplate<>;
+  using SoAView = SoA::View;
+  using SoAMetaRecords = SoA::View::Metarecords;
+
+  constexpr auto tol = 1.0e-5;
+
+  class FillKernel {
+  public:
+    template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(TAcc const& acc, PortableCollection<SoA, Device>::View view) const {
+      if (cms::alpakatools::once_per_grid(acc)) {
+        view.type() = 4;
+        view.someNumber() = 5;
+      }
+
+      for (int32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        view[i].a()(0) = 1 + i;
+        view[i].a()(1) = 2 + i;
+        view[i].a()(2) = 3 + i;
+
+        view[i].b()(0) = 4 + i;
+        view[i].b()(1) = 5 + i;
+        view[i].b()(2) = 6 + i;
+
+        view[i].c()(0, 0) = 4 + i;
+        view[i].c()(0, 1) = 6 + i;
+        view[i].c()(1, 0) = 8 + i;
+        view[i].c()(1, 1) = 10 + i;
+
+        view.x()[i] = 12 + i;
+        view.y()[i] = 1 + 2.5 * i;
+        view.z()[i] = 36 * i;
+      }
     }
+  };
 
-    for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-      ALPAKA_ASSERT_ACC(view[i].a()(0) == 1 + i);
-      ALPAKA_ASSERT_ACC(view[i].a()(1) == 2 + i);
-      ALPAKA_ASSERT_ACC(view[i].a()(2) == 3 + i);
+  class InputVerifyKernel {
+  public:
+    ALPAKA_FN_ACC void operator()(Acc1D const& acc, PortableCollection<SoA, Device>::View view) const {
+      if (cms::alpakatools::once_per_grid(acc)) {
+        ALPAKA_ASSERT_ACC(view.type() == 4);
+        ALPAKA_ASSERT_ACC(view.someNumber() == 5);
+      }
+
+      for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        ALPAKA_ASSERT_ACC(view[i].a()(0) == 1 + i);
+        ALPAKA_ASSERT_ACC(view[i].a()(1) == 2 + i);
+        ALPAKA_ASSERT_ACC(view[i].a()(2) == 3 + i);
 
-      ALPAKA_ASSERT_ACC(view[i].b()(0) == 4 + i);
-      ALPAKA_ASSERT_ACC(view[i].b()(1) == 5 + i);
-      ALPAKA_ASSERT_ACC(view[i].b()(2) == 6 + i);
+        ALPAKA_ASSERT_ACC(view[i].b()(0) == 4 + i);
+        ALPAKA_ASSERT_ACC(view[i].b()(1) == 5 + i);
+        ALPAKA_ASSERT_ACC(view[i].b()(2) == 6 + i);
 
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 0) == 4 + i);
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 1) == 6 + i);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 0) == 8 + i);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 1) == 10 + i);
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 0) == 4 + i);
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 1) == 6 + i);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 0) == 8 + i);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 1) == 10 + i);
 
-      ALPAKA_ASSERT_ACC(view.x()[i] == 12 + i);
-      ALPAKA_ASSERT_ACC(view.y()[i] == 1 + 2.5 * i);
-      ALPAKA_ASSERT_ACC(view.z()[i] == 36 * i);
+        ALPAKA_ASSERT_ACC(view.x()[i] == 12 + i);
+        ALPAKA_ASSERT_ACC(view.y()[i] == 1 + 2.5 * i);
+        ALPAKA_ASSERT_ACC(view.z()[i] == 36 * i);
+      }
     }
-  }
-};
-
-class TestVerifyKernel {
- public:
-  ALPAKA_FN_ACC void operator()(Acc1D const& acc,
-                                PortableCollection<SoA, Device>::View view,
-                                torch::PackedTensorAccessor64<double, 3> tensor_vector,
-                                torch::PackedTensorAccessor64<float, 4> tensor_matrix,
-                                torch::PackedTensorAccessor64<double, 2> tensor_column,
-                                torch::PackedTensorAccessor64<float, 2> tensor_scalar) const {
-    for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-      ALPAKA_ASSERT_ACC(view[i].a()(0) - tensor_vector[i][0][0] < tol);
-      ALPAKA_ASSERT_ACC(view[i].a()(1) - tensor_vector[i][0][1] < tol);
-      ALPAKA_ASSERT_ACC(view[i].a()(2) - tensor_vector[i][0][2] < tol);
-      ALPAKA_ASSERT_ACC(view[i].a()(0) - tensor_vector[i][0][0] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].a()(1) - tensor_vector[i][0][1] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].a()(2) - tensor_vector[i][0][2] > -tol);
-
-      ALPAKA_ASSERT_ACC(view[i].b()(0) - tensor_vector[i][1][0] < tol);
-      ALPAKA_ASSERT_ACC(view[i].b()(1) - tensor_vector[i][1][1] < tol);
-      ALPAKA_ASSERT_ACC(view[i].b()(2) - tensor_vector[i][1][2] < tol);
-      ALPAKA_ASSERT_ACC(view[i].b()(0) - tensor_vector[i][1][0] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].b()(1) - tensor_vector[i][1][1] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].b()(2) - tensor_vector[i][1][2] > -tol);
-
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 0) - tensor_matrix[i][0][0][0] < tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 0) - tensor_matrix[i][0][0][0] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 1) - tensor_matrix[i][0][0][1] < tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(0, 1) - tensor_matrix[i][0][0][1] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 0) - tensor_matrix[i][0][1][0] < tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 0) - tensor_matrix[i][0][1][0] > -tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 1) - tensor_matrix[i][0][1][1] < tol);
-      ALPAKA_ASSERT_ACC(view[i].c()(1, 1) - tensor_matrix[i][0][1][1] > -tol);
-
-      ALPAKA_ASSERT_ACC(view.x()[i] - tensor_column[i][0] < tol);
-      ALPAKA_ASSERT_ACC(view.x()[i] - tensor_column[i][0] > -tol);
-
-      ALPAKA_ASSERT_ACC(view.y()[i] - tensor_column[i][1] < tol);
-      ALPAKA_ASSERT_ACC(view.y()[i] - tensor_column[i][1] > -tol);
-
-      ALPAKA_ASSERT_ACC(view.z()[i] - tensor_column[i][2] < tol);
-      ALPAKA_ASSERT_ACC(view.z()[i] - tensor_column[i][2] > -tol);
+  };
+
+  class TestVerifyKernel {
+  public:
+    ALPAKA_FN_ACC void operator()(Acc1D const& acc,
+                                  PortableCollection<SoA, Device>::View view,
+                                  torch::PackedTensorAccessor64<double, 3> tensor_vector,
+                                  torch::PackedTensorAccessor64<float, 4> tensor_matrix,
+                                  torch::PackedTensorAccessor64<double, 2> tensor_column,
+                                  torch::PackedTensorAccessor64<float, 2> tensor_scalar) const {
+      for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        ALPAKA_ASSERT_ACC(view[i].a()(0) - tensor_vector[i][0][0] < tol);
+        ALPAKA_ASSERT_ACC(view[i].a()(1) - tensor_vector[i][0][1] < tol);
+        ALPAKA_ASSERT_ACC(view[i].a()(2) - tensor_vector[i][0][2] < tol);
+        ALPAKA_ASSERT_ACC(view[i].a()(0) - tensor_vector[i][0][0] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].a()(1) - tensor_vector[i][0][1] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].a()(2) - tensor_vector[i][0][2] > -tol);
+
+        ALPAKA_ASSERT_ACC(view[i].b()(0) - tensor_vector[i][1][0] < tol);
+        ALPAKA_ASSERT_ACC(view[i].b()(1) - tensor_vector[i][1][1] < tol);
+        ALPAKA_ASSERT_ACC(view[i].b()(2) - tensor_vector[i][1][2] < tol);
+        ALPAKA_ASSERT_ACC(view[i].b()(0) - tensor_vector[i][1][0] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].b()(1) - tensor_vector[i][1][1] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].b()(2) - tensor_vector[i][1][2] > -tol);
+
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 0) - tensor_matrix[i][0][0][0] < tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 0) - tensor_matrix[i][0][0][0] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 1) - tensor_matrix[i][0][0][1] < tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(0, 1) - tensor_matrix[i][0][0][1] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 0) - tensor_matrix[i][0][1][0] < tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 0) - tensor_matrix[i][0][1][0] > -tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 1) - tensor_matrix[i][0][1][1] < tol);
+        ALPAKA_ASSERT_ACC(view[i].c()(1, 1) - tensor_matrix[i][0][1][1] > -tol);
+
+        ALPAKA_ASSERT_ACC(view.x()[i] - tensor_column[i][0] < tol);
+        ALPAKA_ASSERT_ACC(view.x()[i] - tensor_column[i][0] > -tol);
+
+        ALPAKA_ASSERT_ACC(view.y()[i] - tensor_column[i][1] < tol);
+        ALPAKA_ASSERT_ACC(view.y()[i] - tensor_column[i][1] > -tol);
+
+        ALPAKA_ASSERT_ACC(view.z()[i] - tensor_column[i][2] < tol);
+        ALPAKA_ASSERT_ACC(view.z()[i] - tensor_column[i][2] > -tol);
 
         ALPAKA_ASSERT_ACC(view.type() - tensor_scalar[i][0] < tol);
         ALPAKA_ASSERT_ACC(view.type() - tensor_scalar[i][0] > -tol);
@@ -178,230 +176,224 @@ class TestVerifyKernel {
   };
 
   class TestOutputVerifyKernel {
-    public:
-      ALPAKA_FN_ACC void operator()(Acc1D const& acc,
-                                    PortableCollection<SoA, Device>::View view) const {
-        for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-          ALPAKA_ASSERT_ACC(view.x()[i] - view.v()[i] < tol);
-          ALPAKA_ASSERT_ACC(view.x()[i] - view.v()[i] > -tol);
-  
-          ALPAKA_ASSERT_ACC(view.y()[i] - view.w()[i] < tol);
-          ALPAKA_ASSERT_ACC(view.y()[i] - view.w()[i] > -tol);
-        }
+  public:
+    ALPAKA_FN_ACC void operator()(Acc1D const& acc, PortableCollection<SoA, Device>::View view) const {
+      for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        ALPAKA_ASSERT_ACC(view.x()[i] - view.v()[i] < tol);
+        ALPAKA_ASSERT_ACC(view.x()[i] - view.v()[i] > -tol);
+
+        ALPAKA_ASSERT_ACC(view.y()[i] - view.w()[i] < tol);
+        ALPAKA_ASSERT_ACC(view.y()[i] - view.w()[i] > -tol);
       }
-    };
-
-void fill(Queue& queue, PortableCollection<SoA, Device>& collection) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue, workDiv, FillKernel{}, collection.view());
-  alpaka::exec<Acc1D>(queue, workDiv, InputVerifyKernel{}, collection.view());
-}
-
-void check(Queue& queue, PortableCollection<SoA, Device>& collection, std::vector<torch::IValue> tensors) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue,
-                      workDiv,
-                      TestVerifyKernel{},
-                      collection.view(),
-                      tensors[3].toTensor().packed_accessor64<double, 3>(),
-                      tensors[2].toTensor().packed_accessor64<float, 4>(),
-                      tensors[0].toTensor().packed_accessor64<double, 2>(),
-                      tensors[1].toTensor().packed_accessor64<float, 2>());
-}
-
-void check_not_ordered(Queue& queue, PortableCollection<SoA, Device>& collection, std::vector<torch::IValue> tensors) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue,
-                      workDiv,
-                      TestVerifyKernel{},
-                      collection.view(),
-                      tensors[0].toTensor().packed_accessor64<double, 3>(),
-                      tensors[1].toTensor().packed_accessor64<float, 4>(),
-                      tensors[2].toTensor().packed_accessor64<double, 2>(),
-                      tensors[3].toTensor().packed_accessor64<float, 2>());
-}
-
-void check_output(Queue& queue, PortableCollection<SoA, Device>& collection) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue,
-                      workDiv,
-                      TestOutputVerifyKernel{},
-                      collection.view());
-}
-
-void TestSOADataTypes::testInterfaceVerbose() {
-  Platform platform;
-  std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
-  const auto& alpakaHost = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue{alpakaDevice};
-  torch::Device torchDevice(kTorchDeviceType);
-
-  // Large batch size, so multiple bunches needed
-  const std::size_t batch_size = 325;
-
-  // Create and fill needed portable collections
-  PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
-  fill(queue, deviceCollection);
-  SoAMetaRecords records = deviceCollection.view().records();
-
-  SoAWrapper<SoA> input(batch_size);
-  input.append_block("vector", records.a(), records.b());
-  input.append_block("matrix", records.c());
-  input.append_block("column", records.x(), records.y(), records.z());
-  input.append_block("scalar", records.type());
-  input.change_order({"column", "scalar", "matrix", "vector"});
-
-  SoAWrapper<SoA> output(batch_size);
-  output.append_block("result", records.v());
-  ModelMetadata metadata(input, output);
-
-  alpaka::wait(queue);
-  std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
-
-  // Check if tensor list built correctly
-  check(queue, deviceCollection, tensors);
-};
-
-void TestSOADataTypes::testMultiOutput() {
-  Platform platform;
-  std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
-  const auto& alpakaHost = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue{alpakaDevice};
-  torch::Device torchDevice(kTorchDeviceType);
-
-  // Large batch size, so multiple bunches needed
-  const std::size_t batch_size = 325;
-
-  // Create and fill needed portable collections
-  PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
-  fill(queue, deviceCollection);
-
-  auto records = deviceCollection.view().records();
-  SoAWrapper<SoA> input(batch_size);
-  input.append_block("x", records.x());
-  input.append_block("y", records.y());
-
-  SoAWrapper<SoA> output(batch_size);
-  output.append_block("v", records.v());
-  output.append_block("w", records.w());
-  ModelMetadata metadata(input, output);
-
-  alpaka::wait(queue);
-  std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
-  Converter::convert_output(tensors, metadata, torchDevice);
-
-  // Check if tensor list built correctly
-  check_output(queue, deviceCollection);
-};
-
-void TestSOADataTypes::testSingleElement() {
-  Platform platform;
-  std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue(alpakaDevice);
-  torch::Device torchDevice(kTorchDeviceType);
-
-
-  // Create and fill portable collections
-  const std::size_t batch_size = 1;
-  PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
-  fill(queue, deviceCollection);
-  SoAMetaRecords records = deviceCollection.view().records();
-
-  // Run Converter for single tensor
-  SoAWrapper<SoA> input(batch_size);
-  input.append_block("vector", records.a(), records.b());
-  input.append_block("matrix", records.c());
-  input.append_block("column", records.x(), records.y(), records.z());
-  input.append_block("scalar", records.type());
-  input.change_order({"column", "scalar", "matrix", "vector"});
-
-  SoAWrapper<SoA> output(batch_size);
-  output.append_block("result", records.v());
-  ModelMetadata metadata(input, output);
-
-  alpaka::wait(queue);
-  std::vector<torch::IValue> tensors =
-      Converter::convert_input(metadata, torchDevice);
-
-  // Check if tensor list built correctly
-  check(queue, deviceCollection, tensors);
-};
-
-void TestSOADataTypes::testNoElement() {
-  Platform platform;
-  std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue(alpakaDevice);
-  torch::Device torchDevice(kTorchDeviceType);
-
-  //Create empty portable collection
-  const std::size_t batch_size = 0;
-  PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
-  SoAMetaRecords records = deviceCollection.view().records();
-
-  // Run Converter
-  SoAWrapper<SoA> input(batch_size);
-  input.append_block("vector", records.a(), records.b());
-  input.append_block("matrix", records.c());
-  input.append_block("column", records.x(), records.y(), records.z());
-  input.append_block("scalar", records.type());
-  input.change_order({"column", "scalar", "matrix", "vector"});
-
-  SoAWrapper<SoA> output(batch_size);
-  output.append_block("result", records.v());
-  ModelMetadata metadata(input, output);
-
-  alpaka::wait(queue);
-  std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
-
-  // Check if tensor list has empty tensors
-  CPPUNIT_ASSERT(tensors[0].toTensor().size(0) == 0);
-  CPPUNIT_ASSERT(tensors[1].toTensor().size(0) == 0);
-  CPPUNIT_ASSERT(tensors[2].toTensor().size(0) == 0);
-  CPPUNIT_ASSERT(tensors[3].toTensor().size(0) == 0);
-};
-
-void TestSOADataTypes::testEmptyMetadata() {
-  // alpaka setup
-  Platform platform;
-  std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue(alpakaDevice);
-  torch::Device torchDevice(kTorchDeviceType);
-
-
-  // Create and fill portable collections
-  const std::size_t batch_size = 12;
-  PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
-  fill(queue, deviceCollection);
-
-  // Run Converter for empty metadata
-  SoAWrapper<SoA> input(batch_size);
-  SoAWrapper<SoA> output(batch_size);
-  ModelMetadata metadata(input, output);
-
-  alpaka::wait(queue);
-  std::vector<torch::IValue> tensors =
-      Converter::convert_input(metadata, torchDevice);
-
-  // Check if tensor list is empty
-  CPPUNIT_ASSERT(tensors.size() == 0);
-};
+    }
+  };
+
+  void fill(Queue& queue, PortableCollection<SoA, Device>& collection) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue, workDiv, FillKernel{}, collection.view());
+    alpaka::exec<Acc1D>(queue, workDiv, InputVerifyKernel{}, collection.view());
+  }
+
+  void check(Queue& queue, PortableCollection<SoA, Device>& collection, std::vector<torch::IValue> tensors) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue,
+                        workDiv,
+                        TestVerifyKernel{},
+                        collection.view(),
+                        tensors[3].toTensor().packed_accessor64<double, 3>(),
+                        tensors[2].toTensor().packed_accessor64<float, 4>(),
+                        tensors[0].toTensor().packed_accessor64<double, 2>(),
+                        tensors[1].toTensor().packed_accessor64<float, 2>());
+  }
+
+  void check_not_ordered(Queue& queue,
+                         PortableCollection<SoA, Device>& collection,
+                         std::vector<torch::IValue> tensors) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue,
+                        workDiv,
+                        TestVerifyKernel{},
+                        collection.view(),
+                        tensors[0].toTensor().packed_accessor64<double, 3>(),
+                        tensors[1].toTensor().packed_accessor64<float, 4>(),
+                        tensors[2].toTensor().packed_accessor64<double, 2>(),
+                        tensors[3].toTensor().packed_accessor64<float, 2>());
+  }
+
+  void check_output(Queue& queue, PortableCollection<SoA, Device>& collection) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue, workDiv, TestOutputVerifyKernel{}, collection.view());
+  }
+
+  void TestSOADataTypes::testInterfaceVerbose() {
+    Platform platform;
+    std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
+    const auto& alpakaHost = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue{alpakaDevice};
+    torch::Device torchDevice(kTorchDeviceType);
+
+    // Large batch size, so multiple bunches needed
+    const std::size_t batch_size = 325;
+
+    // Create and fill needed portable collections
+    PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
+    fill(queue, deviceCollection);
+    SoAMetaRecords records = deviceCollection.view().records();
+
+    SoAWrapper<SoA> input(batch_size);
+    input.append_block("vector", records.a(), records.b());
+    input.append_block("matrix", records.c());
+    input.append_block("column", records.x(), records.y(), records.z());
+    input.append_block("scalar", records.type());
+    input.change_order({"column", "scalar", "matrix", "vector"});
+
+    SoAWrapper<SoA> output(batch_size);
+    output.append_block("result", records.v());
+    ModelMetadata metadata(input, output);
+
+    alpaka::wait(queue);
+    std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
+
+    // Check if tensor list built correctly
+    check(queue, deviceCollection, tensors);
+  };
+
+  void TestSOADataTypes::testMultiOutput() {
+    Platform platform;
+    std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
+    const auto& alpakaHost = alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue{alpakaDevice};
+    torch::Device torchDevice(kTorchDeviceType);
+
+    // Large batch size, so multiple bunches needed
+    const std::size_t batch_size = 325;
+
+    // Create and fill needed portable collections
+    PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
+    fill(queue, deviceCollection);
+
+    auto records = deviceCollection.view().records();
+    SoAWrapper<SoA> input(batch_size);
+    input.append_block("x", records.x());
+    input.append_block("y", records.y());
+
+    SoAWrapper<SoA> output(batch_size);
+    output.append_block("v", records.v());
+    output.append_block("w", records.w());
+    ModelMetadata metadata(input, output);
+
+    alpaka::wait(queue);
+    std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
+    Converter::convert_output(tensors, metadata, torchDevice);
+
+    // Check if tensor list built correctly
+    check_output(queue, deviceCollection);
+  };
+
+  void TestSOADataTypes::testSingleElement() {
+    Platform platform;
+    std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue(alpakaDevice);
+    torch::Device torchDevice(kTorchDeviceType);
+
+    // Create and fill portable collections
+    const std::size_t batch_size = 1;
+    PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
+    fill(queue, deviceCollection);
+    SoAMetaRecords records = deviceCollection.view().records();
+
+    // Run Converter for single tensor
+    SoAWrapper<SoA> input(batch_size);
+    input.append_block("vector", records.a(), records.b());
+    input.append_block("matrix", records.c());
+    input.append_block("column", records.x(), records.y(), records.z());
+    input.append_block("scalar", records.type());
+    input.change_order({"column", "scalar", "matrix", "vector"});
+
+    SoAWrapper<SoA> output(batch_size);
+    output.append_block("result", records.v());
+    ModelMetadata metadata(input, output);
+
+    alpaka::wait(queue);
+    std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
+
+    // Check if tensor list built correctly
+    check(queue, deviceCollection, tensors);
+  };
+
+  void TestSOADataTypes::testNoElement() {
+    Platform platform;
+    std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue(alpakaDevice);
+    torch::Device torchDevice(kTorchDeviceType);
+
+    //Create empty portable collection
+    const std::size_t batch_size = 0;
+    PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
+    SoAMetaRecords records = deviceCollection.view().records();
+
+    // Run Converter
+    SoAWrapper<SoA> input(batch_size);
+    input.append_block("vector", records.a(), records.b());
+    input.append_block("matrix", records.c());
+    input.append_block("column", records.x(), records.y(), records.z());
+    input.append_block("scalar", records.type());
+    input.change_order({"column", "scalar", "matrix", "vector"});
+
+    SoAWrapper<SoA> output(batch_size);
+    output.append_block("result", records.v());
+    ModelMetadata metadata(input, output);
+
+    alpaka::wait(queue);
+    std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
+
+    // Check if tensor list has empty tensors
+    CPPUNIT_ASSERT(tensors[0].toTensor().size(0) == 0);
+    CPPUNIT_ASSERT(tensors[1].toTensor().size(0) == 0);
+    CPPUNIT_ASSERT(tensors[2].toTensor().size(0) == 0);
+    CPPUNIT_ASSERT(tensors[3].toTensor().size(0) == 0);
+  };
+
+  void TestSOADataTypes::testEmptyMetadata() {
+    // alpaka setup
+    Platform platform;
+    std::vector<Device> alpakaDevices = alpaka::getDevs(platform);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue(alpakaDevice);
+    torch::Device torchDevice(kTorchDeviceType);
+
+    // Create and fill portable collections
+    const std::size_t batch_size = 12;
+    PortableCollection<SoA, Device> deviceCollection(batch_size, queue);
+    fill(queue, deviceCollection);
+
+    // Run Converter for empty metadata
+    SoAWrapper<SoA> input(batch_size);
+    SoAWrapper<SoA> output(batch_size);
+    ModelMetadata metadata(input, output);
+
+    alpaka::wait(queue);
+    std::vector<torch::IValue> tensors = Converter::convert_input(metadata, torchDevice);
+
+    // Check if tensor list is empty
+    CPPUNIT_ASSERT(tensors.size() == 0);
+  };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testSOAtoTorch.dev.cc b/PhysicsTools/PyTorch/test/alpaka/testSOAtoTorch.dev.cc
index 90f5c2c583a..3566a7ac309 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testSOAtoTorch.dev.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testSOAtoTorch.dev.cc
@@ -17,128 +17,129 @@
 #include "PhysicsTools/PyTorch/interface/Converter.h"
 #include "PhysicsTools/PyTorch/test/testBase.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-// Input SOA
-GENERATE_SOA_LAYOUT(SoAPositionTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y), SOA_COLUMN(float, z))
-
-using SoAPosition = SoAPositionTemplate<>;
-using SoAPositionView = SoAPosition::View;
-using SoAPositionConstView = SoAPosition::ConstView;
+  // Input SOA
+  GENERATE_SOA_LAYOUT(SoAPositionTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y), SOA_COLUMN(float, z))
 
-// Output SOA
-GENERATE_SOA_LAYOUT(SoAResultTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y))
+  using SoAPosition = SoAPositionTemplate<>;
+  using SoAPositionView = SoAPosition::View;
+  using SoAPositionConstView = SoAPosition::ConstView;
 
-using SoAResult = SoAResultTemplate<>;
-using SoAResultView = SoAResult::View;
+  // Output SOA
+  GENERATE_SOA_LAYOUT(SoAResultTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y))
 
-class testSOAToTorch : public testBasePyTorch {
-  CPPUNIT_TEST_SUITE(testSOAToTorch);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
+  using SoAResult = SoAResultTemplate<>;
+  using SoAResultView = SoAResult::View;
 
-public:
-  std::string pyScript() const override;
-  void test() override;
-};
+  class testSOAToTorch : public testBasePyTorch {
+    CPPUNIT_TEST_SUITE(testSOAToTorch);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
 
-CPPUNIT_TEST_SUITE_REGISTRATION(testSOAToTorch);
+  public:
+    std::string pyScript() const override;
+    void test() override;
+  };
 
-std::string testSOAToTorch::pyScript() const { return "create_linear_dnn.py"; }
+  CPPUNIT_TEST_SUITE_REGISTRATION(testSOAToTorch);
 
-// Build Tensor, run model and fill output pointer with result
-template <typename SOA_Input, typename SOA_Output>
-void run(torch::Device device, torch::jit::Module& model, const cms::torch::alpaka::ModelMetadata<SOA_Input, SOA_Output>& metadata) {
-  std::vector<torch::jit::IValue> input_tensor = cms::torch::alpaka::Converter::convert_input(metadata, device);
-  cms::torch::alpaka::Converter::convert_output(metadata, device) = model.forward(input_tensor).toTensor();
-}
+  std::string testSOAToTorch::pyScript() const { return "create_linear_dnn.py"; }
 
-class FillKernel {
-public:
-  template <typename TAcc, typename = std::enable_if_t<::alpaka::isAccelerator<TAcc>>>
-  ALPAKA_FN_ACC void operator()(TAcc const& acc, PortableCollection<SoAPosition, Device>::View view) const {
-    float input[4][3] = {{1, 2, 1}, {2, 4, 3}, {3, 4, 1}, {2, 3, 2}};
+  // Build Tensor, run model and fill output pointer with result
+  template <typename SOA_Input, typename SOA_Output>
+  void run(torch::Device device,
+           torch::jit::Module& model,
+           const cms::torch::alpaka::ModelMetadata<SOA_Input, SOA_Output>& metadata) {
+    std::vector<torch::jit::IValue> input_tensor = cms::torch::alpaka::Converter::convert_input(metadata, device);
+    cms::torch::alpaka::Converter::convert_output(metadata, device) = model.forward(input_tensor).toTensor();
+  }
 
-    for (int32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-      view.x()[i] = input[i][0];
-      view.y()[i] = input[i][1];
-      view.z()[i] = input[i][2];
+  class FillKernel {
+  public:
+    template <typename TAcc, typename = std::enable_if_t<::alpaka::isAccelerator<TAcc>>>
+    ALPAKA_FN_ACC void operator()(TAcc const& acc, PortableCollection<SoAPosition, Device>::View view) const {
+      float input[4][3] = {{1, 2, 1}, {2, 4, 3}, {3, 4, 1}, {2, 3, 2}};
+
+      for (int32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        view.x()[i] = input[i][0];
+        view.y()[i] = input[i][1];
+        view.z()[i] = input[i][2];
+      }
     }
-  }
-};
-
-class TestVerifyKernel {
-public:
-  ALPAKA_FN_ACC void operator()(Acc1D const& acc, PortableCollection<SoAResult, Device>::View view) const {
-    float result_check[4][2] = {{2.3, -0.5}, {6.6, 3.0}, {2.5, -4.9}, {4.4, 1.3}};
-    for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
-      ALPAKA_ASSERT_ACC(view.x()[i] - result_check[i][0] < 1.0e-05);
-      ALPAKA_ASSERT_ACC(view.x()[i] - result_check[i][0] > -1.0e-05);
-      ALPAKA_ASSERT_ACC(view.y()[i] - result_check[i][1] < 1.0e-05);
-      ALPAKA_ASSERT_ACC(view.y()[i] - result_check[i][1] > -1.0e-05);
+  };
+
+  class TestVerifyKernel {
+  public:
+    ALPAKA_FN_ACC void operator()(Acc1D const& acc, PortableCollection<SoAResult, Device>::View view) const {
+      float result_check[4][2] = {{2.3, -0.5}, {6.6, 3.0}, {2.5, -4.9}, {4.4, 1.3}};
+      for (uint32_t i : cms::alpakatools::uniform_elements(acc, view.metadata().size())) {
+        ALPAKA_ASSERT_ACC(view.x()[i] - result_check[i][0] < 1.0e-05);
+        ALPAKA_ASSERT_ACC(view.x()[i] - result_check[i][0] > -1.0e-05);
+        ALPAKA_ASSERT_ACC(view.y()[i] - result_check[i][1] < 1.0e-05);
+        ALPAKA_ASSERT_ACC(view.y()[i] - result_check[i][1] > -1.0e-05);
+      }
     }
+  };
+
+  void fill(Queue& queue, PortableCollection<SoAPosition, Device>& collection) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue, workDiv, FillKernel{}, collection.view());
   }
-};
-
-void fill(Queue& queue, PortableCollection<SoAPosition, Device>& collection) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue, workDiv, FillKernel{}, collection.view());
-}
-
-void check(Queue& queue, PortableCollection<SoAResult, Device>& collection) {
-  uint32_t items = 64;
-  uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
-  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
-  alpaka::exec<Acc1D>(queue, workDiv, TestVerifyKernel{}, collection.view());
-}
-
-void testSOAToTorch::test() {
-  Platform platform;
-  std::vector<Device> alpakaDevices = ::alpaka::getDevs(platform);
-  const auto& alpakaHost = ::alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
-  CPPUNIT_ASSERT(alpakaDevices.size());
-  const auto& alpakaDevice = alpakaDevices[0];
-  Queue queue{alpakaDevice};
-  torch::Device torchDevice(cms::torch::alpaka::kTorchDeviceType);
-
-  // Number of elements
-  const std::size_t batch_size = 4;
-
-  // Create and fill needed portable collections
-  PortableCollection<SoAPosition, Device> positionCollection(batch_size, alpakaDevice);
-  PortableCollection<SoAResult, Device> resultCollection(batch_size, alpakaDevice);
-  fill(queue, positionCollection);
-  alpaka::wait(queue);
-
-  torch::jit::script::Module model;
-  try {
-    // Deserialize the ScriptModule from a file using torch::jit::load().
-    std::string model_path = dataPath_ + "/linear_dnn.pt";
-    model = torch::jit::load(model_path);
-    model.to(torchDevice);
-
-  } catch (const c10::Error& e) {
-    std::cerr << "error loading the model\n" << e.what() << std::endl;
+
+  void check(Queue& queue, PortableCollection<SoAResult, Device>& collection) {
+    uint32_t items = 64;
+    uint32_t groups = cms::alpakatools::divide_up_by(collection->metadata().size(), items);
+    auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(groups, items);
+    alpaka::exec<Acc1D>(queue, workDiv, TestVerifyKernel{}, collection.view());
   }
 
-  // Create SoA Metadata
-  cms::torch::alpaka::SoAWrapper<SoAPosition> input(batch_size);
-  auto posview = positionCollection.view().records();
-  input.append_block("main", posview.x(), posview.y(), posview.z());
+  void testSOAToTorch::test() {
+    Platform platform;
+    std::vector<Device> alpakaDevices = ::alpaka::getDevs(platform);
+    const auto& alpakaHost = ::alpaka::getDevByIdx(alpaka_common::PlatformHost(), 0u);
+    CPPUNIT_ASSERT(alpakaDevices.size());
+    const auto& alpakaDevice = alpakaDevices[0];
+    Queue queue{alpakaDevice};
+    torch::Device torchDevice(cms::torch::alpaka::kTorchDeviceType);
+
+    // Number of elements
+    const std::size_t batch_size = 4;
+
+    // Create and fill needed portable collections
+    PortableCollection<SoAPosition, Device> positionCollection(batch_size, alpakaDevice);
+    PortableCollection<SoAResult, Device> resultCollection(batch_size, alpakaDevice);
+    fill(queue, positionCollection);
+    alpaka::wait(queue);
+
+    torch::jit::script::Module model;
+    try {
+      // Deserialize the ScriptModule from a file using torch::jit::load().
+      std::string model_path = dataPath_ + "/linear_dnn.pt";
+      model = torch::jit::load(model_path);
+      model.to(torchDevice);
+
+    } catch (const c10::Error& e) {
+      std::cerr << "error loading the model\n" << e.what() << std::endl;
+    }
+
+    // Create SoA Metadata
+    cms::torch::alpaka::SoAWrapper<SoAPosition> input(batch_size);
+    auto posview = positionCollection.view().records();
+    input.append_block("main", posview.x(), posview.y(), posview.z());
 
-  cms::torch::alpaka::SoAWrapper<SoAResult> output(batch_size);
-  auto view = resultCollection.view().records();
-  output.append_block("result", view.x(), view.y());
-  cms::torch::alpaka::ModelMetadata metadata(input, output);
+    cms::torch::alpaka::SoAWrapper<SoAResult> output(batch_size);
+    auto view = resultCollection.view().records();
+    output.append_block("result", view.x(), view.y());
+    cms::torch::alpaka::ModelMetadata metadata(input, output);
 
-  // Call function to build tensor and run model
-  run(torchDevice, model, metadata);
-  alpaka::wait(queue);
+    // Call function to build tensor and run model
+    run(torchDevice, model, metadata);
+    alpaka::wait(queue);
 
-  check(queue, resultCollection);
-}
+    check(queue, resultCollection);
+  }
 
-}  // namespace ALPAKA_ACCELERATOR_NAMESPACE::torch_alpaka
\ No newline at end of file
+}  // namespace ALPAKA_ACCELERATOR_NAMESPACE
\ No newline at end of file
diff --git a/PhysicsTools/PyTorch/test/alpaka/testTorchDeviceMatchAlpaka.cc b/PhysicsTools/PyTorch/test/alpaka/testTorchDeviceMatchAlpaka.cc
index 100e0c007bc..f966926d046 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testTorchDeviceMatchAlpaka.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testTorchDeviceMatchAlpaka.cc
@@ -4,31 +4,30 @@
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 #include "PhysicsTools/PyTorch/interface/AlpakaConfig.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-class TestTorchDeviceMatchAlpaka : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(TestTorchDeviceMatchAlpaka);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
+  class TestTorchDeviceMatchAlpaka : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(TestTorchDeviceMatchAlpaka);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
 
- public:
-  void test();
-};
+  public:
+    void test();
+  };
 
-CPPUNIT_TEST_SUITE_REGISTRATION(TestTorchDeviceMatchAlpaka);
+  CPPUNIT_TEST_SUITE_REGISTRATION(TestTorchDeviceMatchAlpaka);
 
-void TestTorchDeviceMatchAlpaka::test() {
-  // alpaka setup
-  Platform platform;
-  const auto& devices = alpaka::getDevs(platform);
+  void TestTorchDeviceMatchAlpaka::test() {
+    // alpaka setup
+    Platform platform;
+    const auto& devices = alpaka::getDevs(platform);
 
-  std::cout << "Devices:" << std::endl;
-  for (auto& device : devices) {
-    std::cout << "- " << alpaka::getName(device) << std::endl;
-    auto torch_device = cms::torch::alpaka::device(device);
-    CPPUNIT_ASSERT(torch_device.type() == cms::torch::alpaka::kTorchDeviceType);
+    std::cout << "Devices:" << std::endl;
+    for (auto& device : devices) {
+      std::cout << "- " << alpaka::getName(device) << std::endl;
+      auto torch_device = cms::torch::alpaka::device(device);
+      CPPUNIT_ASSERT(torch_device.type() == cms::torch::alpaka::kTorchDeviceType);
+    }
   }
-}
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/alpaka/testTorchExecutionControlGuard.cc b/PhysicsTools/PyTorch/test/alpaka/testTorchExecutionControlGuard.cc
index 858f5bc2570..8efae73552f 100644
--- a/PhysicsTools/PyTorch/test/alpaka/testTorchExecutionControlGuard.cc
+++ b/PhysicsTools/PyTorch/test/alpaka/testTorchExecutionControlGuard.cc
@@ -11,117 +11,109 @@
 #include "PhysicsTools/PyTorch/interface/Nvtx.h"
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-GENERATE_SOA_LAYOUT(SoAInputsTemplate, 
-  SOA_COLUMN(float, x), 
-  SOA_COLUMN(float, y), 
-  SOA_COLUMN(float, z)
-)
-
-GENERATE_SOA_LAYOUT(SoAOutputsTemplate, 
-  SOA_COLUMN(float, m), 
-  SOA_COLUMN(float, n)
-)
-
-using SoAInputs = SoAInputsTemplate<>;
-using SoAOutputs = SoAOutputsTemplate<>;
-
-class testTorchExecutionControlGuard : public CppUnit::TestFixture {
-  CPPUNIT_TEST_SUITE(testTorchExecutionControlGuard);
-  CPPUNIT_TEST(test);
-  CPPUNIT_TEST_SUITE_END();
-
- public:
-  void test();
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(testTorchExecutionControlGuard);
-
-void testTorchExecutionControlGuard::test() {
-  if (prctl(PR_SET_NAME, "test::Main", 0, 0, 0))
-    printf("Warning: Could not set thread name: %s\n", strerror(errno));
-
-  // alpaka setup
-  Platform platform;
-  const auto& devices = alpaka::getDevs(platform);
-  assert(!devices.empty());
-  const auto& device = devices[0];
-
-  uint32_t batch_size = 32;
-
-  // host structs
-  PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
-  // prepare inputs
-  for (size_t i = 0; i < batch_size; i++) { 
-    inputs_host.view().x()[i] = 0.0f;
-    inputs_host.view().y()[i] = 0.0f;
-    inputs_host.view().z()[i] = 0.0f;
-  }
-
-  size_t num_threads = 8;
-  std::vector<std::thread> threads;
-  for (size_t t = 1; t <= num_threads; ++t) {
-    threads.emplace_back([&, t] {
-      std::cout << "T " << t << " Starting" << std::endl;
-      Queue queue{device};
-      cms::torch::alpaka::Guard<Queue> guard(queue);
-
-      char threadName[15];
-      snprintf(threadName, 15, "test::%ld", t);
-      if (prctl(PR_SET_NAME, threadName, 0, 0, 0))
-        printf("Warning: Could not set thread name: %s\n", strerror(errno));
-
-      for (size_t i = 0; i < 10; i++) {
-        NvtxScopedRange malloc_range((std::string("Malloc thread ") + std::to_string(t)).c_str());
-        // host structs
-        PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
-        // device structs
-        std::cout << "T" << t << " I" << i << std::endl;
-        PortableCollection<SoAInputs, Device> inputs_device(batch_size, queue);
-        PortableCollection<SoAOutputs, Device> outputs_device(batch_size, queue);
-        alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
-        malloc_range.end();
-
-        NvtxScopedRange minit_range((std::string("Model instantiation thread ") + std::to_string(t)).c_str());
-        auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
-        auto jit_model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>(m_path);
-        jit_model.to(queue);
-        CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
-        minit_range.end();
-
-        NvtxScopedRange meta_range((std::string("Metarecords thread ") + std::to_string(t)).c_str());
-        auto input_records = inputs_device.view().records();
-        auto output_records = outputs_device.view().records();
-        cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
-        inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
-        cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size); 
-        outputs_metadata.append_block("preds", output_records.m(), output_records.n());
-        cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
-        meta_range.end();
-        // inference
-        NvtxScopedRange infer_range((std::string("Inference thread ") + std::to_string(t)).c_str());
-        jit_model.forward(metadata);
-        infer_range.end();
-        // check outputs
-        NvtxScopedRange assert_range((std::string("Assert thread ") + std::to_string(t)).c_str());
-        alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
-        alpaka::wait(queue);
-
-        for (size_t i = 0; i < batch_size; i++) {
-          CPPUNIT_ASSERT(outputs_host.const_view().m()[i] == 0.5f);
-          CPPUNIT_ASSERT(outputs_host.const_view().n()[i] == 0.5f);
+  GENERATE_SOA_LAYOUT(SoAInputsTemplate, SOA_COLUMN(float, x), SOA_COLUMN(float, y), SOA_COLUMN(float, z))
+
+  GENERATE_SOA_LAYOUT(SoAOutputsTemplate, SOA_COLUMN(float, m), SOA_COLUMN(float, n))
+
+  using SoAInputs = SoAInputsTemplate<>;
+  using SoAOutputs = SoAOutputsTemplate<>;
+
+  class testTorchExecutionControlGuard : public CppUnit::TestFixture {
+    CPPUNIT_TEST_SUITE(testTorchExecutionControlGuard);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
+
+  public:
+    void test();
+  };
+
+  CPPUNIT_TEST_SUITE_REGISTRATION(testTorchExecutionControlGuard);
+
+  void testTorchExecutionControlGuard::test() {
+    if (prctl(PR_SET_NAME, "test::Main", 0, 0, 0))
+      printf("Warning: Could not set thread name: %s\n", strerror(errno));
+
+    // alpaka setup
+    Platform platform;
+    const auto& devices = alpaka::getDevs(platform);
+    assert(!devices.empty());
+    const auto& device = devices[0];
+
+    uint32_t batch_size = 32;
+
+    // host structs
+    PortableHostCollection<SoAInputs> inputs_host(batch_size, cms::alpakatools::host());
+    // prepare inputs
+    for (size_t i = 0; i < batch_size; i++) {
+      inputs_host.view().x()[i] = 0.0f;
+      inputs_host.view().y()[i] = 0.0f;
+      inputs_host.view().z()[i] = 0.0f;
+    }
+
+    size_t num_threads = 8;
+    std::vector<std::thread> threads;
+    for (size_t t = 1; t <= num_threads; ++t) {
+      threads.emplace_back([&, t] {
+        std::cout << "T " << t << " Starting" << std::endl;
+        Queue queue{device};
+        cms::torch::alpaka::Guard<Queue> guard(queue);
+
+        char threadName[15];
+        snprintf(threadName, 15, "test::%ld", t);
+        if (prctl(PR_SET_NAME, threadName, 0, 0, 0))
+          printf("Warning: Could not set thread name: %s\n", strerror(errno));
+
+        for (size_t i = 0; i < 10; i++) {
+          NvtxScopedRange malloc_range((std::string("Malloc thread ") + std::to_string(t)).c_str());
+          // host structs
+          PortableHostCollection<SoAOutputs> outputs_host(batch_size, cms::alpakatools::host());
+          // device structs
+          std::cout << "T" << t << " I" << i << std::endl;
+          PortableCollection<SoAInputs, Device> inputs_device(batch_size, queue);
+          PortableCollection<SoAOutputs, Device> outputs_device(batch_size, queue);
+          alpaka::memcpy(queue, inputs_device.buffer(), inputs_host.buffer());
+          malloc_range.end();
+
+          NvtxScopedRange minit_range((std::string("Model instantiation thread ") + std::to_string(t)).c_str());
+          auto m_path = get_path("/src/PhysicsTools/PyTorch/models/jit_classification_model.pt");
+          auto jit_model = cms::torch::alpaka::Model<cms::torch::alpaka::CompilationType::kJustInTime>(m_path);
+          jit_model.to(queue);
+          CPPUNIT_ASSERT(cms::torch::alpaka::device(queue) == jit_model.device());
+          minit_range.end();
+
+          NvtxScopedRange meta_range((std::string("Metarecords thread ") + std::to_string(t)).c_str());
+          auto input_records = inputs_device.view().records();
+          auto output_records = outputs_device.view().records();
+          cms::torch::alpaka::SoAWrapper<SoAInputs> inputs_metadata(batch_size);
+          inputs_metadata.append_block("features", input_records.x(), input_records.y(), input_records.z());
+          cms::torch::alpaka::SoAWrapper<SoAOutputs> outputs_metadata(batch_size);
+          outputs_metadata.append_block("preds", output_records.m(), output_records.n());
+          cms::torch::alpaka::ModelMetadata<SoAInputs, SoAOutputs> metadata(inputs_metadata, outputs_metadata);
+          meta_range.end();
+          // inference
+          NvtxScopedRange infer_range((std::string("Inference thread ") + std::to_string(t)).c_str());
+          jit_model.forward(metadata);
+          infer_range.end();
+          // check outputs
+          NvtxScopedRange assert_range((std::string("Assert thread ") + std::to_string(t)).c_str());
+          alpaka::memcpy(queue, outputs_host.buffer(), outputs_device.buffer());
+          alpaka::wait(queue);
+
+          for (size_t i = 0; i < batch_size; i++) {
+            CPPUNIT_ASSERT(outputs_host.const_view().m()[i] == 0.5f);
+            CPPUNIT_ASSERT(outputs_host.const_view().n()[i] == 0.5f);
+          }
+          assert_range.end();
         }
-        assert_range.end();
-      }  
-      
-      std::cout << "T " << t << " OK." << std::endl;
-    });
-  }
 
-  for (auto& t : threads)
-    t.join();
-}
+        std::cout << "T " << t << " OK." << std::endl;
+      });
+    }
+
+    for (auto& t : threads)
+      t.join();
+  }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/PhysicsTools/PyTorch/test/testModel.cc b/PhysicsTools/PyTorch/test/testModel.cc
index 55c30141d4f..bca55acdeab 100644
--- a/PhysicsTools/PyTorch/test/testModel.cc
+++ b/PhysicsTools/PyTorch/test/testModel.cc
@@ -4,7 +4,6 @@
 #include <cppunit/extensions/HelperMacros.h>
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 class TestModel : public CppUnit::TestFixture {
   CPPUNIT_TEST_SUITE(TestModel);
   CPPUNIT_TEST(testClassificationCpu);
@@ -30,8 +29,8 @@ CPPUNIT_TEST_SUITE_REGISTRATION(TestModel);
 
 void TestModel::testClassificationCpu() {
   auto device = torch::Device(torch::kCPU, 0);
-  auto inputs = torch::ones({batch_size_, 3}, device=device);
-  
+  auto inputs = torch::ones({batch_size_, 3}, device = device);
+
   ClassifierModel model;
   auto outputs = model.forward(inputs);
   CPPUNIT_ASSERT(torch::allclose(outputs, torch::full_like(outputs, 0.5f)));
@@ -39,7 +38,7 @@ void TestModel::testClassificationCpu() {
 
 void TestModel::testClassificationCuda() {
   auto device = torch::Device(torch::kCUDA, 0);
-  auto inputs = torch::ones({batch_size_, 3}, device=device);
+  auto inputs = torch::ones({batch_size_, 3}, device = device);
 
   ClassifierModel model;
   model.to(device);
@@ -49,8 +48,8 @@ void TestModel::testClassificationCuda() {
 
 void TestModel::testRegressionCpu() {
   auto device = torch::Device(torch::kCPU, 0);
-  auto inputs = torch::ones({batch_size_, 3}, device=device);
-  
+  auto inputs = torch::ones({batch_size_, 3}, device = device);
+
   RegressionModel model;
   auto outputs = model.forward(inputs);
   CPPUNIT_ASSERT(torch::allclose(outputs, torch::full_like(outputs, 0.5f)));
@@ -58,7 +57,7 @@ void TestModel::testRegressionCpu() {
 
 void TestModel::testRegressionCuda() {
   auto device = torch::Device(torch::kCUDA, 0);
-  auto inputs = torch::ones({batch_size_, 3}, device=device);
+  auto inputs = torch::ones({batch_size_, 3}, device = device);
 
   RegressionModel model;
   model.to(device);
@@ -68,8 +67,8 @@ void TestModel::testRegressionCuda() {
 
 void TestModel::testMultiTaskModelCpu() {
   auto device = torch::Device(torch::kCPU, 0);
-  auto inputs = torch::ones({batch_size_, 5}, device=device);
-  
+  auto inputs = torch::ones({batch_size_, 5}, device = device);
+
   MultiTaskModel model;
   auto [class_probs, reg_output] = model.forward(inputs);
   CPPUNIT_ASSERT(torch::allclose(class_probs, torch::full_like(class_probs, 0.2f)));
@@ -78,7 +77,7 @@ void TestModel::testMultiTaskModelCpu() {
 
 void TestModel::testMultiTaskModelCuda() {
   auto device = torch::Device(torch::kCUDA, 0);
-  auto inputs = torch::ones({batch_size_, 5}, device=device);
+  auto inputs = torch::ones({batch_size_, 5}, device = device);
 
   MultiTaskModel model;
   model.to(device);
diff --git a/PhysicsTools/PyTorch/test/testTensorStride.cu b/PhysicsTools/PyTorch/test/testTensorStride.cu
index 32943ffca6e..85837dfdc16 100644
--- a/PhysicsTools/PyTorch/test/testTensorStride.cu
+++ b/PhysicsTools/PyTorch/test/testTensorStride.cu
@@ -12,7 +12,6 @@
 
 #include "PhysicsTools/PyTorch/test/testUtilities.h"
 
-
 class TestTensorStride : public CppUnit::TestFixture {
   CPPUNIT_TEST_SUITE(TestTensorStride);
   CPPUNIT_TEST(test);
@@ -24,17 +23,16 @@ public:
 
 CPPUNIT_TEST_SUITE_REGISTRATION(TestTensorStride);
 
-
 template <typename T, std::size_t N>
 torch::Tensor array_to_tensor(torch::Device device, T* arr, const long int* size) {
   long int arr_size[N];
   long int arr_stride[N];
-  std::copy(size, size+N, arr_size);
-  std::copy(size, size+N, arr_stride);
+  std::copy(size, size + N, arr_size);
+  std::copy(size, size + N, arr_stride);
 
   std::shift_right(std::begin(arr_stride), std::end(arr_stride), 1);
   arr_stride[0] = 1;
-  arr_stride[N-1] *= arr_stride[N-2];
+  arr_stride[N - 1] *= arr_stride[N - 2];
 
   auto options = torch::TensorOptions().dtype(torch::CppTypeToScalarType<T>()).device(device).pinned_memory(true);
   return torch::from_blob(arr, arr_size, arr_stride, options);
@@ -45,46 +43,49 @@ void print_column_major(T* arr, const long int* size) {
   if (N == 2) {
     for (int i = 0; i < size[0]; i++) {
       for (int j = 0; j < size[1]; j++) {
-          std::cout << arr[i + j*size[0]] << " ";
+        std::cout << arr[i + j * size[0]] << " ";
       }
       std::cout << std::endl;
-    } 
+    }
   } else if (N == 3) {
     for (int i = 0; i < size[0]; i++) {
       std::cout << "(" << i << ", .., ..)" << std::endl;
       for (int j = 0; j < size[1]; j++) {
         for (int k = 0; k < size[2]; k++) {
-          std::cout << arr[i + j*size[0] + k*size[0]*size[1]] << " ";
+          std::cout << arr[i + j * size[0] + k * size[0] * size[1]] << " ";
         }
         std::cout << std::endl;
-      } 
+      }
       std::cout << std::endl;
     }
   }
   std::cout << std::endl;
 }
 
-
 template <typename T, std::size_t N, std::size_t M>
-void run(torch::Device device, LinearModel model, T* input, const long int* input_shape, T* output, const long int* output_shape) {
+void run(torch::Device device,
+         LinearModel model,
+         T* input,
+         const long int* input_shape,
+         T* output,
+         const long int* output_shape) {
   torch::Tensor input_tensor = array_to_tensor<T, N>(device, input, input_shape);
 
   // from_blod doesn't work if use array from parameter list
   long int res_shape[M];
-  std::copy(output_shape, output_shape+M, res_shape);
+  std::copy(output_shape, output_shape + M, res_shape);
 
   array_to_tensor<T, M>(device, output, output_shape) = model.forward(input_tensor);
 }
 
-
 void TestTensorStride::test() {
   torch::Device device(torch::kCUDA);
-  
+
   float input_cpu[] = {1, 2, 3, 2, 2, 4, 4, 3, 1, 3, 1, 2};
   const long int shape[] = {4, 3};
 
   const long int result_shape[] = {4, 2};
-  float result_cpu[result_shape[0]*result_shape[1]];
+  float result_cpu[result_shape[0] * result_shape[1]];
   float result_check[4][2] = {{2.3, -0.5}, {6.6, 3.0}, {2.5, -4.9}, {4.4, 1.3}};
 
   // Prints array in correct form.
@@ -93,11 +94,11 @@ void TestTensorStride::test() {
   float *input_gpu, *result_gpu;
   cudaMalloc(&input_gpu, sizeof(input_cpu));
   cudaMalloc(&result_gpu, sizeof(result_cpu));
-  cudaMemcpy(input_gpu, input_cpu, sizeof(input_cpu), cudaMemcpyHostToDevice); 
+  cudaMemcpy(input_gpu, input_cpu, sizeof(input_cpu), cudaMemcpyHostToDevice);
 
   LinearModel model;
   model.to(device);
-  
+
   // Call function to build tensor and run model
   run<float, 2, 2>(device, model, input_gpu, shape, result_gpu, result_shape);
 
@@ -105,8 +106,7 @@ void TestTensorStride::test() {
   cudaMemcpy(result_cpu, result_gpu, sizeof(result_cpu), cudaMemcpyDeviceToHost);
   for (int i = 0; i < result_shape[0]; i++) {
     for (int j = 0; j < result_shape[1]; j++) {
-      CPPUNIT_ASSERT(std::abs(result_cpu[i + j*result_shape[0]] - result_check[i][j]) <= 1.0e-05);
+      CPPUNIT_ASSERT(std::abs(result_cpu[i + j * result_shape[0]] - result_check[i][j]) <= 1.0e-05);
     }
   }
-
 }
diff --git a/PhysicsTools/PyTorch/test/testTorchFromBlobStride.cu b/PhysicsTools/PyTorch/test/testTorchFromBlobStride.cu
index d9088bc48e8..fe147e914e3 100644
--- a/PhysicsTools/PyTorch/test/testTorchFromBlobStride.cu
+++ b/PhysicsTools/PyTorch/test/testTorchFromBlobStride.cu
@@ -9,10 +9,9 @@
 #include <cppunit/extensions/HelperMacros.h>
 #include <torch/torch.h>
 
-
-using std::chrono::high_resolution_clock;
-using std::chrono::duration_cast;
 using std::chrono::duration;
+using std::chrono::duration_cast;
+using std::chrono::high_resolution_clock;
 using std::chrono::milliseconds;
 
 /*
@@ -22,12 +21,12 @@ template <typename T, std::size_t N>
 torch::Tensor array_to_tensor(torch::Device device, T* arr, const long int* size) {
   long int arr_size[N];
   long int arr_stride[N];
-  std::copy(size, size+N, arr_size);
-  std::copy(size, size+N, arr_stride);
+  std::copy(size, size + N, arr_size);
+  std::copy(size, size + N, arr_stride);
 
   std::shift_right(std::begin(arr_stride), std::end(arr_stride), 1);
   arr_stride[0] = 1;
-  arr_stride[N-1] *= arr_stride[N-2];
+  arr_stride[N - 1] *= arr_stride[N - 2];
 
   // Create Torch DType based on https://discuss.pytorch.org/t/mapping-a-template-type-to-a-scalartype/53174
   auto options = torch::TensorOptions().dtype(torch::CppTypeToScalarType<T>()).device(device).pinned_memory(true);
@@ -41,26 +40,25 @@ void print_column_major(T* arr, const long int* size) {
   if (N == 2) {
     for (int i = 0; i < size[0]; i++) {
       for (int j = 0; j < size[1]; j++) {
-          std::cout << arr[i + j*size[0]] << " ";
+        std::cout << arr[i + j * size[0]] << " ";
       }
       std::cout << std::endl;
-    } 
+    }
   } else if (N == 3) {
     for (int i = 0; i < size[0]; i++) {
       std::cout << "(" << i << ", .., ..)" << std::endl;
       for (int j = 0; j < size[1]; j++) {
         for (int k = 0; k < size[2]; k++) {
-          std::cout << arr[i + j*size[0] + k*size[0]*size[1]] << " ";
+          std::cout << arr[i + j * size[0] + k * size[0] * size[1]] << " ";
         }
         std::cout << std::endl;
-      } 
+      }
       std::cout << std::endl;
     }
   }
 }
 
-
-int main(int argc, char* argv[]) {  
+int main(int argc, char* argv[]) {
   torch::Device device(torch::kCUDA);
 
   int a_cpu[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
@@ -75,7 +73,7 @@ int main(int argc, char* argv[]) {
   // Prints array in correct form.
   print_column_major<int, dims>(a_cpu, a_shape);
 
-  int *a_gpu;
+  int* a_gpu;
   cudaMalloc(&a_gpu, sizeof(a_cpu));
   cudaMemcpy(a_gpu, a_cpu, sizeof(a_cpu), cudaMemcpyHostToDevice);
 
@@ -83,16 +81,16 @@ int main(int argc, char* argv[]) {
   auto options = torch::TensorOptions().dtype(torch::kInt).device(device).pinned_memory(true);
   std::cout << "Converting vector to Torch tensors on CPU without stride" << std::endl;
   torch::Tensor tensor = torch::from_blob(a_gpu, a_shape, options);
-  std::cout << tensor << std::endl;  
+  std::cout << tensor << std::endl;
 
   // Correct Transposition to get to smae dimensions as column major.
   std::cout << "Correct Tensor with Transpose" << std::endl;
   long int a_size[dims];
-  std::copy(a_shape, a_shape+dims, a_size);
+  std::copy(a_shape, a_shape + dims, a_size);
   std::reverse(std::begin(a_size), std::end(a_size));
   tensor = torch::from_blob(a_gpu, a_size, options);
 
-  tensor = torch::transpose(tensor, 0, dims-1);
+  tensor = torch::transpose(tensor, 0, dims - 1);
   std::cout << tensor << std::endl;
 
   // Use stride to read correctly.
@@ -106,9 +104,9 @@ int main(int argc, char* argv[]) {
     for (int j = 0; j < b_shape[1]; j++) {
       b[i][j] = rand();
     }
-  } 
+  }
 
-  int *b_gpu;
+  int* b_gpu;
   cudaMalloc(&b_gpu, b_shape[0] * b_shape[1] * sizeof(int));
   cudaMemcpy(b_gpu, b, b_shape[0] * b_shape[1] * sizeof(int), cudaMemcpyHostToDevice);
 
@@ -124,12 +122,12 @@ int main(int argc, char* argv[]) {
   t1 = high_resolution_clock::now();
   const size_t dim_b2 = sizeof(b_shape) / sizeof(long int);
   long int b_size[dim_b2];
-  std::copy(b_shape, b_shape+dim_b2, b_size);
+  std::copy(b_shape, b_shape + dim_b2, b_size);
   std::reverse(std::begin(a_size), std::end(a_size));
 
   std::reverse(std::begin(b_shape), std::end(b_shape));
   torch::Tensor tensor_transp = torch::from_blob(b_gpu, b_size, options);
-  tensor_transp = torch::transpose(tensor_transp, 0, dim_b2-1);
+  tensor_transp = torch::transpose(tensor_transp, 0, dim_b2 - 1);
   t2 = high_resolution_clock::now();
   ms_double = t2 - t1;
   std::cout << "Transpose:" << ms_double.count() << "ms\n";
diff --git a/PhysicsTools/PyTorch/test/testUtilities.h b/PhysicsTools/PyTorch/test/testUtilities.h
index 5e01d2f00ef..4cbbd084ba9 100644
--- a/PhysicsTools/PyTorch/test/testUtilities.h
+++ b/PhysicsTools/PyTorch/test/testUtilities.h
@@ -14,13 +14,10 @@ std::string get_path(std::string path) {
   return (std::filesystem::exists(base.c_str()) ? base : rel) + path;
 }
 
-
 class ClassifierModel : public torch::nn::Module {
 public:
-  ClassifierModel(int in_dim = 3, int out_dim = 2)
-      : fc(in_dim, out_dim), softmax(torch::nn::SoftmaxOptions(1)) {
-    auto weight =
-        torch::tensor({{1.0, 1.0, 0.0}, {1.0, 1.0, 0.0}}, torch::kFloat);
+  ClassifierModel(int in_dim = 3, int out_dim = 2) : fc(in_dim, out_dim), softmax(torch::nn::SoftmaxOptions(1)) {
+    auto weight = torch::tensor({{1.0, 1.0, 0.0}, {1.0, 1.0, 0.0}}, torch::kFloat);
     fc->weight.set_data(weight);
     fc->weight.requires_grad_(false);
 
@@ -42,13 +39,11 @@ private:
   torch::nn::Softmax softmax;
 };
 
-
 class RegressionModel : public torch::nn::Module {
 public:
-  RegressionModel(int in_dim = 3, int out_dim = 1)
-      : fc(in_dim, out_dim) {
+  RegressionModel(int in_dim = 3, int out_dim = 1) : fc(in_dim, out_dim) {
     fc->weight.set_data(torch::zeros_like(fc->weight));
-    fc->weight.requires_grad_(false); 
+    fc->weight.requires_grad_(false);
 
     fc->bias.set_data(torch::full_like(fc->bias, 0.5f));
     fc->bias.requires_grad_(false);
@@ -56,24 +51,16 @@ public:
     fc = register_module("fc", fc);
   }
 
-  torch::Tensor forward(torch::Tensor input) {
-    return fc(input);
-  }
+  torch::Tensor forward(torch::Tensor input) { return fc(input); }
 
 private:
   torch::nn::Linear fc{nullptr};
 };
 
-
 class LinearModel : public torch::nn::Module {
 public:
-  LinearModel(int in_dim = 3, int out_dim = 2)
-      : fc(in_dim, out_dim) {
-    
-    auto weights = torch::tensor({
-        {-0.1f, 0.2f, 2.0f},
-        {0.1f, -2.3f, 4.0f}
-    });
+  LinearModel(int in_dim = 3, int out_dim = 2) : fc(in_dim, out_dim) {
+    auto weights = torch::tensor({{-0.1f, 0.2f, 2.0f}, {0.1f, -2.3f, 4.0f}});
     fc->weight.set_data(weights);
     fc->weight.requires_grad_(false);
 
@@ -83,24 +70,16 @@ public:
     fc = register_module("fc", fc);
   }
 
-  torch::Tensor forward(torch::Tensor x) {
-    return fc(x);
-  }
+  torch::Tensor forward(torch::Tensor x) { return fc(x); }
 
 private:
   torch::nn::Linear fc{nullptr};
 };
 
-
 class MultiTaskModel : public torch::nn::Module {
 public:
   MultiTaskModel(int input_dim = 5)
-      : fc1(input_dim, 128),
-        fc2(128, 128),
-        class_fc1(128, 64),
-        class_fc2(64, 5),
-        reg_fc1(128, 64),
-        reg_fc2(64, 1) {
+      : fc1(input_dim, 128), fc2(128, 128), class_fc1(128, 64), class_fc2(64, 5), reg_fc1(128, 64), reg_fc2(64, 1) {
     fc1 = register_module("fc1", fc1);
     fc2 = register_module("fc2", fc2);
     class_fc1 = register_module("class_fc1", class_fc1);
@@ -108,7 +87,6 @@ public:
     reg_fc1 = register_module("reg_fc1", reg_fc1);
     reg_fc2 = register_module("reg_fc2", reg_fc2);
 
-
     // set weights and biases
     fc1->weight.set_data(torch::full_like(fc1->weight, 0.1f));
     fc1->bias.set_data(torch::full_like(fc1->bias, 0.0f));
@@ -145,7 +123,7 @@ public:
 
     // Regression head
     auto reg = reg_fc1->forward(x);
-    reg = reg_fc2->forward(reg); 
+    reg = reg_fc2->forward(reg);
 
     return std::make_tuple(cls, reg);
   }