Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-05-11 03:00:20

0001 #!/usr/bin/env python3
0002 
0003 
0004 """
0005 Testing all the nnet library
0006 """
0007 from __future__ import division, print_function
0008 
0009 from builtins import range
0010 import numpy
0011 from sklearn.linear_model import LogisticRegression
0012 from sklearn.datasets import make_blobs
0013 from sklearn.metrics import roc_auc_score
0014 
0015 from hep_ml import nnet
0016 from hep_ml.commonutils import generate_sample
0017 from hep_ml.preprocessing import BinTransformer, IronTransformer
0018 
0019 __author__ = 'Alex Rogozhnikov'
0020 
0021 
0022 def test_nnet(n_samples=200, n_features=7, distance=0.8, complete=False):
0023     """
0024     :param complete: if True, all possible combinations will be checked, and quality is printed
0025     """
0026     X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)
0027 
0028     nn_types = [
0029         nnet.SimpleNeuralNetwork,
0030         nnet.MLPClassifier,
0031         nnet.SoftmaxNeuralNetwork,
0032         nnet.RBFNeuralNetwork,
0033         nnet.PairwiseNeuralNetwork,
0034         nnet.PairwiseSoftplusNeuralNetwork,
0035     ]
0036 
0037     if complete:
0038         # checking all possible combinations
0039         for loss in nnet.losses:
0040             for NNType in nn_types:
0041                 for trainer in nnet.trainers:
0042                     nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42, epochs=100)
0043                     nn.fit(X, y )
0044                     print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)
0045 
0046         lr = LogisticRegression().fit(X, y)
0047         print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))
0048 
0049         assert 0 == 1, "Let's see and compare results"
0050     else:
0051         # checking combinations of losses, nn_types, trainers, most of them are used once during tests.
0052         attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
0053         losses_shift = numpy.random.randint(10)
0054         trainers_shift = numpy.random.randint(10)
0055         for attempt in range(attempts):
0056             # each combination is tried 3 times. before raising exception
0057             retry_attempts = 3
0058             for retry_attempt in range(retry_attempts):
0059                 loss = list(nnet.losses.keys())[(attempt + losses_shift) % len(nnet.losses)]
0060                 trainer = list(nnet.trainers.keys())[(attempt + trainers_shift) % len(nnet.trainers)]
0061 
0062                 nn_type = nn_types[attempt % len(nn_types)]
0063 
0064                 nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42 + retry_attempt, epochs=200)
0065                 print(nn)
0066                 nn.fit(X, y)
0067                 quality = roc_auc_score(y, nn.predict_proba(X)[:, 1])
0068                 computed_loss = nn.compute_loss(X, y)
0069                 if quality > 0.8:
0070                     break
0071                 else:
0072                     print('attempt {} : {}'.format(retry_attempt, quality))
0073                     if retry_attempt == retry_attempts - 1:
0074                         raise RuntimeError('quality of model is too low: {} {}'.format(quality, nn))
0075 
0076 
0077 def test_with_scaler(n_samples=200, n_features=15, distance=0.5):
0078     X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)
0079     for scaler in [BinTransformer(max_bins=16), IronTransformer()]:
0080         clf = nnet.SimpleNeuralNetwork(scaler=scaler,epochs=300)
0081         clf.fit(X, y)
0082 
0083         p = clf.predict_proba(X)
0084         assert roc_auc_score(y, p[:, 1]) > 0.8, 'quality is too low for model: {}'.format(clf)
0085 
0086 
0087 print("NNet test")
0088 test_nnet()
0089 print("Scaler test")
0090 test_with_scaler()