Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:21:25

0001 #ifndef BITONIC_HYBRID_REF_H
0002 #define BITONIC_HYBRID_REF_H
0003 
0004 #include <algorithm>
0005 #include <cassert>
0006 
0007 namespace hybridBitonicSortUtils {
0008   inline unsigned int PowerOf2LessThan(unsigned int n) {
0009     unsigned int i = 1;
0010     unsigned int prev = 1;
0011     if (n <= 1)
0012       return n;
0013     while (i < n) {
0014       i <<= 1;
0015       if (i < n) {
0016         prev = i;
0017       } else {
0018         return prev;
0019       }
0020     }
0021     // shouldn't happen
0022     assert(false);
0023   }
0024 
0025   template <typename T>
0026   void compAndSwap(T a[], int i, int j, bool dir) {
0027     if (dir) {
0028       if (a[j] < a[i])
0029         std::swap(a[i], a[j]);
0030     } else {
0031       if (a[i] < a[j])
0032         std::swap(a[i], a[j]);
0033     }
0034   }
0035 
0036   inline unsigned int bitonicMergeLatencyRef(unsigned int nIn) {
0037     if (nIn <= 1)
0038       return 0;
0039     return 1 +
0040            std::max(bitonicMergeLatencyRef(PowerOf2LessThan(nIn)), bitonicMergeLatencyRef(nIn - PowerOf2LessThan(nIn)));
0041   }
0042 
0043   inline unsigned int bitonicSortLatencyRef(unsigned int nIn, unsigned int nOut) {
0044     if (nIn <= 1)
0045       return 0;
0046     unsigned int sort1Size = nIn / 2, sort2Size = nIn - sort1Size;
0047     unsigned int sort1Latency = bitonicSortLatencyRef(sort1Size, nOut);
0048     unsigned int sort2Latency = bitonicSortLatencyRef(sort2Size, nOut);
0049     unsigned int mergeLatency = bitonicMergeLatencyRef(std::min(sort1Size, nOut) + std::min(sort2Size, nOut));
0050     return std::max(sort1Latency, sort2Latency) + mergeLatency;
0051   }
0052 
0053   inline unsigned int hybridBitonicSortLatencyRef(unsigned int nIn, unsigned int nOut) {
0054     if (nIn <= 1)
0055       return 0;
0056     if (nIn == 5 || nIn == 6)
0057       return 3;
0058     if (nIn == 12)
0059       return 8;
0060     if (nIn == 13)
0061       return 9;
0062     unsigned int sort1Size = nIn / 2, sort2Size = nIn - sort1Size;
0063     unsigned int sort1Latency = hybridBitonicSortLatencyRef(sort1Size, nOut);
0064     unsigned int sort2Latency = hybridBitonicSortLatencyRef(sort2Size, nOut);
0065     unsigned int mergeLatency = bitonicMergeLatencyRef(std::min(sort1Size, nOut) + std::min(sort2Size, nOut));
0066     return std::max(sort1Latency, sort2Latency) + mergeLatency;
0067   }
0068 
0069   // may be specialized for different types if needed
0070   template <typename T>
0071   void clear(T& t) {
0072     t.clear();
0073   }
0074 
0075 }  // namespace hybridBitonicSortUtils
0076 
0077 template <typename T>
0078 void hybridBitonicMergeRef(T a[], int N, int low, bool dir) {
0079   int k = hybridBitonicSortUtils::PowerOf2LessThan(N);
0080   int k2 = N - k;
0081   if (N > 1) {
0082     for (int i = low; i < low + k; i++) {
0083       if (i + k < low + N)
0084         hybridBitonicSortUtils::compAndSwap(a, i, i + k, dir);
0085     }
0086     if (N > 2) {
0087       hybridBitonicMergeRef(a, k, low, dir);
0088       hybridBitonicMergeRef(a, k2, low + k, dir);
0089     }
0090   }
0091 }
0092 
0093 template <typename T>
0094 void check_sorted(T a[], int N, int low, bool dir) {
0095   bool ok = true;
0096   if (dir) {
0097     for (int i = 1; i < N; ++i)
0098       ok = ok && (!(a[low + i - 1] > a[low + i]));
0099   } else {
0100     for (int i = 1; i < N; ++i)
0101       ok = ok && (!(a[low + i - 1] < a[low + i]));
0102   }
0103   if (!ok) {
0104     printf("ERROR in sorting[N=%d,low=%d,dir=%d]: ", N, low, int(dir));
0105     //for (int i = 0; i < N; ++i) printf("%d[%s]  ", a[low+i].intPt(), a[low+i].pack().to_string(16).c_str());
0106     //for (int i = 0; i < N; ++i) printf("%d  ", a[low+i]);
0107     printf("\n");
0108     fflush(stdout);
0109     assert(ok);
0110   }
0111 }
0112 
0113 template <typename T>
0114 void hybridBitonicSortRef(T a[], int N, int low, bool dir, bool hybrid) {
0115   if (hybrid) {  // sorting networks defined by hand for a few cases
0116     switch (N) {
0117       case 2:
0118         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0119         return;
0120       case 3:
0121         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0122         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0123         //---
0124         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0125         //check_sorted(a, N, low, dir);
0126         return;
0127       case 4:
0128         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0129         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0130         //---
0131         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 2, dir);
0132         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 3, dir);
0133         //---
0134         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0135         //check_sorted(a, N, low, dir);
0136         return;
0137       case 5:
0138         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0139         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0140         //---
0141         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 3, dir);
0142         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0143         //---
0144         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 2, dir);
0145         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0146         //--
0147         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0148         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0149         //--
0150         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0151         //check_sorted(a, N, low, dir);
0152         return;
0153       case 6:
0154         //---
0155         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 3, dir);
0156         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0157         //---
0158         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0159         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0160         hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 5, dir);
0161         //---
0162         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 3, dir);
0163         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0164         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 5, dir);
0165         //---
0166         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0167         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0168         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0169         //---
0170         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0171         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0172         //check_sorted(a, N, low, dir);
0173         return;
0174       case 12:
0175         for (int i = 0; i < 12; i += 2) {
0176           hybridBitonicSortUtils::compAndSwap(a, low + i, low + i + 1, dir);
0177         }
0178         //---
0179         for (int i = 0; i < 12; i += 4) {
0180           hybridBitonicSortUtils::compAndSwap(a, low + i + 0, low + i + 2, dir);
0181           hybridBitonicSortUtils::compAndSwap(a, low + i + 1, low + i + 3, dir);
0182         }
0183         //---
0184         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0185         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0186         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0187         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0188         hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0189         //---
0190         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0191         hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 10, dir);
0192         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 9, dir);
0193         hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 8, dir);
0194         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 7, dir);
0195         //---
0196         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0197         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0198         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0199         hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0200         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0201         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 8, dir);
0202         //---
0203         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0204         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 10, dir);
0205         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0206         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0207         hybridBitonicSortUtils::compAndSwap(a, low + 8, low + 9, dir);
0208         //---
0209         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0210         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0211         hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 8, dir);
0212         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 9, dir);
0213         //---
0214         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0215         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0216         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 8, dir);
0217         //check_sorted(a, N, low, dir);
0218         return;
0219       case 13:
0220         for (int i = 0; i + 1 < 13; i += 2) {
0221           hybridBitonicSortUtils::compAndSwap(a, low + i, low + i + 1, dir);
0222         }
0223         //---
0224         for (int i = 0; i + 3 < 13; i += 4) {
0225           hybridBitonicSortUtils::compAndSwap(a, low + i + 0, low + i + 2, dir);
0226           hybridBitonicSortUtils::compAndSwap(a, low + i + 1, low + i + 3, dir);
0227         }
0228         //---
0229         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0230         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0231         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0232         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 7, dir);
0233         hybridBitonicSortUtils::compAndSwap(a, low + 8, low + 12, dir);
0234         //---
0235         hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 8, dir);
0236         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 9, dir);
0237         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 10, dir);
0238         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 11, dir);
0239         hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 12, dir);
0240         //---
0241         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0242         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 12, dir);
0243         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0244         hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 8, dir);
0245         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 10, dir);
0246         hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 9, dir);
0247         //---
0248         hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0249         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 8, dir);
0250         hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 12, dir);
0251         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 10, dir);
0252         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 9, dir);
0253         //---
0254         hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0255         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0256         hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 8, dir);
0257         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 9, dir);
0258         hybridBitonicSortUtils::compAndSwap(a, low + 10, low + 12, dir);
0259         //---
0260         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 6, dir);
0261         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 8, dir);
0262         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 10, dir);
0263         hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 12, dir);
0264         //---
0265         hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0266         hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0267         hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 8, dir);
0268         hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0269         hybridBitonicSortUtils::compAndSwap(a, low + 11, low + 12, dir);
0270         //check_sorted(a, N, low, dir);
0271         return;
0272     }
0273   }
0274 
0275   // general case
0276   if (N > 1) {
0277     int lowerSize = N / 2;
0278     int upperSize = N - N / 2;
0279     bool notDir = not dir;
0280     hybridBitonicSortRef(a, lowerSize, low, notDir, hybrid);
0281     hybridBitonicSortRef(a, upperSize, low + lowerSize, dir, hybrid);
0282     hybridBitonicMergeRef(a, N, low, dir);
0283     //check_sorted(a, N, low, dir);
0284   }
0285 }
0286 
0287 template <typename T>
0288 void hybrid_bitonic_sort_and_crop_ref(
0289     unsigned int nIn, unsigned int nOut, const T in[], T out[], bool hybrid = true) {  // just an interface
0290   T work[nIn];
0291   for (unsigned int i = 0; i < nIn; ++i) {
0292     work[i] = in[i];
0293   }
0294   hybridBitonicSortRef(work, nIn, 0, false, hybrid);
0295   for (unsigned int i = 0; i < nOut; ++i) {
0296     out[i] = work[i];
0297   }
0298 }
0299 
0300 template <typename T>
0301 void folded_hybrid_bitonic_sort_and_crop_ref(
0302     unsigned int nIn, unsigned int nOut, const T in[], T out[], bool hybrid = true) {  // just an interface
0303   unsigned int nHalf = (nIn + 1) / 2;
0304   T work[nHalf], halfsorted[nHalf];
0305   //printf("hybrid sort input %u items: ", nIn);
0306   //for (int i = 0; i < nIn; ++i) printf("%d.%03d  ", work[i].intPt(), work[i].intEta());
0307   //for (int i = 0; i < nIn; ++i) if (in[i].hwPt) printf("[%d]%s  ", i, in[i].pack().to_string(16).c_str());
0308   //printf("\n");
0309   //fflush(stdout);
0310   for (int o = 1; o >= 0; --o) {
0311     for (unsigned int i = 0; i < nHalf && 2 * i + o < nIn; ++i) {
0312       work[i] = in[2 * i + o];
0313     }
0314     if ((nIn % 2 == 1) && (o == 1)) {
0315       hybridBitonicSortUtils::clear(work[nHalf - 1]);
0316     }
0317     hybridBitonicSortRef(work, nHalf, 0, false, hybrid);
0318     //printf("hybrid sort offset %d with %u items: ", o, nHalf);
0319     //for (int i = 0; i < nHalf; ++i) printf("%d.%03d  ", work[i].intPt(), work[i].intEta());
0320     //for (int i = 0; i < nHalf; ++i) printf("%s  ", work[i].pack().to_string(16).c_str());
0321     //printf("\n");
0322     //fflush(stdout);
0323     for (unsigned int i = 1; i < nHalf; ++i)
0324       assert(!(work[i - 1] < work[i]));
0325     if (o == 1) {
0326       for (unsigned int i = 0; i < nHalf; ++i) {
0327         halfsorted[i] = work[i];
0328       }
0329     }
0330   }
0331   // now merge work with the reversed of half-sorted
0332   unsigned int nMerge = std::min(nOut, nHalf);
0333   T tomerge[2 * nMerge];
0334   for (unsigned int i = 0; i < nMerge; ++i) {
0335     tomerge[nMerge - i - 1] = halfsorted[i];
0336     tomerge[nMerge + i] = work[i];
0337   }
0338   //printf("hybrid sort tomerge %u items before: ", 2*nMerge);
0339   //for (int i = 0; i < 2*nMerge; ++i) printf("%d.%03d  ", tomerge[i].intPt(), tomerge[i].intEta());
0340   //for (int i = 0; i < 2*nMerge; ++i) printf("%s  ", tomerge[i].pack().to_string(16).c_str());
0341   //printf("\n");
0342   hybridBitonicMergeRef(tomerge, 2 * nMerge, 0, false);
0343   //printf("hybrid sort tomerge %u items after:  ", 2*nMerge);
0344   //for (int i = 0; i < 2*nMerge; ++i) printf("%d.%03d  ", tomerge[i].intPt(), tomerge[i].intEta());
0345   //for (int i = 0; i < nOut; ++i) printf("%s  ", tomerge[i].pack().to_string(16).c_str());
0346   //printf("\n");
0347   //fflush(stdout);
0348   for (unsigned int i = 0; i < nOut; ++i) {
0349     out[i] = tomerge[i];
0350     if (i > 0)
0351       assert(!(out[i - 1] < out[i]));
0352   }
0353 }
0354 
0355 #endif