Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:07

0001 #!/usr/bin/env python3
0002 
0003 # test for pytables
0004 # taken from https://kastnerkyle.github.io/posts/using-pytables-for-larger-than-ram-data-processing/
0005 # but with some interface modifications (presumably due to pytables changes)
0006 
0007 import numpy as np
0008 import matplotlib 
0009 matplotlib.use('Agg')
0010 import matplotlib.pyplot as plt
0011 import tables
0012 
0013 random_state = np.random.RandomState(1999)
0014 
0015 def make_random_cluster_points(n_samples, random_state=random_state):
0016     mu_options = np.array([(-1, -1), (1, 1), (1, -1), (-1, 1)])
0017     sigma = 0.2
0018     mu_choices = random_state.randint(0, len(mu_options), size=n_samples)
0019     means = mu_options[mu_choices]
0020     return means + np.random.randn(n_samples, 2) * sigma, mu_choices
0021 
0022 def plot_clusters(data, clusters, name):
0023     plt.figure()
0024     colors = ["#9b59b6", "#3498db", "#e74c3c", "#2ecc71"]
0025     for i in np.unique(clusters):
0026         plt.scatter(data[clusters==i, 0], data[clusters==i, 1], color=colors[i])
0027     plt.axis('off')
0028     plt.title('Plot from %s' % name)
0029 
0030 sample_data, sample_clusters = make_random_cluster_points(10000)
0031 hdf5_path = "my_data.hdf5"
0032 hdf5_file = tables.file.open_file(hdf5_path, mode='w')
0033 data_storage = hdf5_file.create_array(hdf5_file.root, 'data', sample_data)
0034 clusters_storage = hdf5_file.create_array(hdf5_file.root, 'clusters', sample_clusters)
0035 hdf5_file.close()
0036 
0037 hdf5_path = "my_data.hdf5"
0038 read_hdf5_file = tables.file.open_file(hdf5_path, mode='r')
0039 # Here we slice [:] all the data back into memory, then operate on it
0040 hdf5_data = read_hdf5_file.root.data[:]
0041 hdf5_clusters = read_hdf5_file.root.clusters[:]
0042 read_hdf5_file.close()
0043 
0044 plot_clusters(hdf5_data, hdf5_clusters, "PyTables Array")