MVAComputer/src/ProcMLP.cc

0001 // -*- C++ -*-
0002 //
0003 // Package:     MVAComputer
0004 // Class  :     ProcMLP
0005 //
0006
0007 // Implementation:
0008 //     An evaluator for a feed-forward neural net (multi-layer perceptron).
0009 //     Each layer has (n + 1) x m weights for n input neurons, 1 bias
0010 //     and m neurons. Also each layer can select between linear and logistic
0011 //     activation function. The output from the last layer is returned.
0012 //
0013 // Author:      Christophe Saout
0014 // Created:     Sat Apr 24 15:18 CEST 2007
0015 //
0016
0017 #include <cstdlib>
0018 #include <algorithm>
0019 #include <iterator>
0020 #include <vector>
0021 #include <cmath>
0022 #include <cassert>
0023
0024 #include "FWCore/Utilities/interface/Exception.h"
0025
0026 #include "PhysicsTools/MVAComputer/interface/VarProcessor.h"
0027 #include "PhysicsTools/MVAComputer/interface/Calibration.h"
0028
0029 using namespace PhysicsTools;
0030
0031 namespace {  // anonymous
0032
0033   class ProcMLP : public VarProcessor {
0034   public:
0035     typedef VarProcessor::Registry::Registry<ProcMLP, Calibration::ProcMLP> Registry;
0036
0037     ProcMLP(const char *name, const Calibration::ProcMLP *calib, const MVAComputer *computer);
0038     ~ProcMLP() override {}
0039
0040     void configure(ConfIterator iter, unsigned int n) override;
0041     void eval(ValueIterator iter, unsigned int n) const override;
0042     std::vector<double> deriv(ValueIterator iter, unsigned int n) const override;
0043
0044   private:
0045     struct Layer {
0046       Layer(const Calibration::ProcMLP::Layer &calib);
0047       Layer(const Layer &orig)
0048           : inputs(orig.inputs), neurons(orig.neurons), coeffs(orig.coeffs), sigmoid(orig.sigmoid) {}
0049
0050       unsigned int inputs;
0051       unsigned int neurons;
0052       std::vector<double> coeffs;
0053       bool sigmoid;
0054     };
0055
0056     std::vector<Layer> layers;
0057     unsigned int maxTmp;
0058   };
0059
0060   ProcMLP::Registry registry("ProcMLP");
0061
0062   ProcMLP::Layer::Layer(const Calibration::ProcMLP::Layer &calib)
0063       : inputs(calib.first.front().second.size()), neurons(calib.first.size()), sigmoid(calib.second) {
0064     typedef Calibration::ProcMLP::Neuron Neuron;
0065
0066     coeffs.resize(neurons * (inputs + 1));
0067     std::vector<double>::iterator inserter = coeffs.begin();
0068
0069     for (std::vector<Neuron>::const_iterator iter = calib.first.begin(); iter != calib.first.end(); iter++) {
0070       *inserter++ = iter->first;
0071
0072       if (iter->second.size() != inputs)
0073         throw cms::Exception("ProcMLPInput") << "ProcMLP neuron layer inconsistent." << std::endl;
0074
0075       inserter = std::copy(iter->second.begin(), iter->second.end(), inserter);
0076     }
0077   }
0078
0079   ProcMLP::ProcMLP(const char *name, const Calibration::ProcMLP *calib, const MVAComputer *computer)
0080       : VarProcessor(name, calib, computer), maxTmp(0) {
0081     std::copy(calib->layers.begin(), calib->layers.end(), std::back_inserter(layers));
0082
0083     for (unsigned int i = 0; i < layers.size(); i++) {
0084       maxTmp = std::max<unsigned int>(maxTmp, layers[i].neurons);
0085       if (i > 0 && layers[i - 1].neurons != layers[i].inputs)
0086         throw cms::Exception("ProcMLPInput") << "ProcMLP neuron layers do not connect "
0087                                                 "properly."
0088                                              << std::endl;
0089     }
0090   }
0091
0092   void ProcMLP::configure(ConfIterator iter, unsigned int n) {
0093     if (n != layers.front().inputs)
0094       return;
0095
0096     for (unsigned int i = 0; i < n; i++)
0097       iter++(Variable::FLAG_NONE);
0098
0099     for (unsigned int i = 0; i < layers.back().neurons; i++)
0100       iter << Variable::FLAG_NONE;
0101   }
0102
0103   void ProcMLP::eval(ValueIterator iter, unsigned int n) const {
0104     double *tmp = (double *)calloc(2 * maxTmp, sizeof(double));
0105     bool flip = false;
0106
0107     for (double *pos = tmp; iter; iter++, pos++)
0108       *pos = *iter;
0109
0110     double *output = nullptr;
0111     for (std::vector<Layer>::const_iterator layer = layers.begin(); layer != layers.end(); layer++, flip = !flip) {
0112       assert(maxTmp >= layer->inputs);
0113       const double *input = &tmp[flip ? maxTmp : 0];
0114       output = &tmp[flip ? 0 : maxTmp];
0115       std::vector<double>::const_iterator coeff = layer->coeffs.begin();
0116       for (unsigned int i = 0; i < layer->neurons; i++) {
0117         double sum = *coeff++;
0118         for (unsigned int j = 0; j < layer->inputs; j++)
0119           sum += input[j] * *coeff++;
0120         if (layer->sigmoid)
0121           sum = 1.0 / (std::exp(-sum) + 1.0);
0122         *output++ = sum;
0123       }
0124     }
0125
0126     for (const double *pos = &tmp[flip ? maxTmp : 0]; pos < output; pos++)
0127       iter(*pos);
0128     free(tmp);
0129   }
0130
0131   std::vector<double> ProcMLP::deriv(ValueIterator iter, unsigned int n) const {
0132     std::vector<double> prevValues, nextValues;
0133     std::vector<double> prevMatrix, nextMatrix;
0134
0135     while (iter)
0136       nextValues.push_back(*iter++);
0137
0138     unsigned int size = nextValues.size();
0139     nextMatrix.resize(size * size);
0140     for (unsigned int i = 0; i < size; i++)
0141       nextMatrix[i * size + i] = 1.;
0142
0143     for (std::vector<Layer>::const_iterator layer = layers.begin(); layer != layers.end(); layer++) {
0144       prevValues.clear();
0145       std::swap(prevValues, nextValues);
0146       prevMatrix.clear();
0147       std::swap(prevMatrix, nextMatrix);
0148
0149       std::vector<double>::const_iterator coeff = layer->coeffs.begin();
0150       for (unsigned int i = 0; i < layer->neurons; i++) {
0151         double sum = *coeff++;
0152         for (unsigned int j = 0; j < layer->inputs; j++)
0153           sum += prevValues[j] * *coeff++;
0154
0155         double deriv;
0156         if (layer->sigmoid) {
0157           double e = std::exp(-sum);
0158           sum = 1.0 / (e + 1.0);
0159           deriv = 1.0 / (e + 1.0 / e + 2.0);
0160         } else
0161           deriv = 1.0;
0162
0163         nextValues.push_back(sum);
0164
0165         for (unsigned int k = 0; k < size; k++) {
0166           sum = 0.0;
0167           coeff -= layer->inputs;
0168           for (unsigned int j = 0; j < layer->inputs; j++)
0169             sum += prevMatrix[j * size + k] * *coeff++;
0170           nextMatrix.push_back(sum * deriv);
0171         }
0172       }
0173     }
0174
0175     return nextMatrix;
0176   }
0177
0178 }  // anonymous namespace