File indexing completed on 2024-04-06 12:21:25
0001 #ifndef BITONIC_HYBRID_REF_H
0002 #define BITONIC_HYBRID_REF_H
0003
0004 #include <algorithm>
0005 #include <cassert>
0006
0007 namespace hybridBitonicSortUtils {
0008 inline unsigned int PowerOf2LessThan(unsigned int n) {
0009 unsigned int i = 1;
0010 unsigned int prev = 1;
0011 if (n <= 1)
0012 return n;
0013 while (i < n) {
0014 i <<= 1;
0015 if (i < n) {
0016 prev = i;
0017 } else {
0018 return prev;
0019 }
0020 }
0021
0022 assert(false);
0023 }
0024
0025 template <typename T>
0026 void compAndSwap(T a[], int i, int j, bool dir) {
0027 if (dir) {
0028 if (a[j] < a[i])
0029 std::swap(a[i], a[j]);
0030 } else {
0031 if (a[i] < a[j])
0032 std::swap(a[i], a[j]);
0033 }
0034 }
0035
0036 inline unsigned int bitonicMergeLatencyRef(unsigned int nIn) {
0037 if (nIn <= 1)
0038 return 0;
0039 return 1 +
0040 std::max(bitonicMergeLatencyRef(PowerOf2LessThan(nIn)), bitonicMergeLatencyRef(nIn - PowerOf2LessThan(nIn)));
0041 }
0042
0043 inline unsigned int bitonicSortLatencyRef(unsigned int nIn, unsigned int nOut) {
0044 if (nIn <= 1)
0045 return 0;
0046 unsigned int sort1Size = nIn / 2, sort2Size = nIn - sort1Size;
0047 unsigned int sort1Latency = bitonicSortLatencyRef(sort1Size, nOut);
0048 unsigned int sort2Latency = bitonicSortLatencyRef(sort2Size, nOut);
0049 unsigned int mergeLatency = bitonicMergeLatencyRef(std::min(sort1Size, nOut) + std::min(sort2Size, nOut));
0050 return std::max(sort1Latency, sort2Latency) + mergeLatency;
0051 }
0052
0053 inline unsigned int hybridBitonicSortLatencyRef(unsigned int nIn, unsigned int nOut) {
0054 if (nIn <= 1)
0055 return 0;
0056 if (nIn == 5 || nIn == 6)
0057 return 3;
0058 if (nIn == 12)
0059 return 8;
0060 if (nIn == 13)
0061 return 9;
0062 unsigned int sort1Size = nIn / 2, sort2Size = nIn - sort1Size;
0063 unsigned int sort1Latency = hybridBitonicSortLatencyRef(sort1Size, nOut);
0064 unsigned int sort2Latency = hybridBitonicSortLatencyRef(sort2Size, nOut);
0065 unsigned int mergeLatency = bitonicMergeLatencyRef(std::min(sort1Size, nOut) + std::min(sort2Size, nOut));
0066 return std::max(sort1Latency, sort2Latency) + mergeLatency;
0067 }
0068
0069
0070 template <typename T>
0071 void clear(T& t) {
0072 t.clear();
0073 }
0074
0075 }
0076
0077 template <typename T>
0078 void hybridBitonicMergeRef(T a[], int N, int low, bool dir) {
0079 int k = hybridBitonicSortUtils::PowerOf2LessThan(N);
0080 int k2 = N - k;
0081 if (N > 1) {
0082 for (int i = low; i < low + k; i++) {
0083 if (i + k < low + N)
0084 hybridBitonicSortUtils::compAndSwap(a, i, i + k, dir);
0085 }
0086 if (N > 2) {
0087 hybridBitonicMergeRef(a, k, low, dir);
0088 hybridBitonicMergeRef(a, k2, low + k, dir);
0089 }
0090 }
0091 }
0092
0093 template <typename T>
0094 void check_sorted(T a[], int N, int low, bool dir) {
0095 bool ok = true;
0096 if (dir) {
0097 for (int i = 1; i < N; ++i)
0098 ok = ok && (!(a[low + i - 1] > a[low + i]));
0099 } else {
0100 for (int i = 1; i < N; ++i)
0101 ok = ok && (!(a[low + i - 1] < a[low + i]));
0102 }
0103 if (!ok) {
0104 printf("ERROR in sorting[N=%d,low=%d,dir=%d]: ", N, low, int(dir));
0105
0106
0107 printf("\n");
0108 fflush(stdout);
0109 assert(ok);
0110 }
0111 }
0112
0113 template <typename T>
0114 void hybridBitonicSortRef(T a[], int N, int low, bool dir, bool hybrid) {
0115 if (hybrid) {
0116 switch (N) {
0117 case 2:
0118 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0119 return;
0120 case 3:
0121 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0122 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0123
0124 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0125
0126 return;
0127 case 4:
0128 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0129 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0130
0131 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 2, dir);
0132 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 3, dir);
0133
0134 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0135
0136 return;
0137 case 5:
0138 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0139 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0140
0141 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 3, dir);
0142 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0143
0144 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 2, dir);
0145 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0146
0147 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0148 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0149
0150 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0151
0152 return;
0153 case 6:
0154
0155 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 3, dir);
0156 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0157
0158 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0159 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0160 hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 5, dir);
0161
0162 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 3, dir);
0163 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0164 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 5, dir);
0165
0166 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 1, dir);
0167 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0168 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0169
0170 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0171 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0172
0173 return;
0174 case 12:
0175 for (int i = 0; i < 12; i += 2) {
0176 hybridBitonicSortUtils::compAndSwap(a, low + i, low + i + 1, dir);
0177 }
0178
0179 for (int i = 0; i < 12; i += 4) {
0180 hybridBitonicSortUtils::compAndSwap(a, low + i + 0, low + i + 2, dir);
0181 hybridBitonicSortUtils::compAndSwap(a, low + i + 1, low + i + 3, dir);
0182 }
0183
0184 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0185 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0186 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0187 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0188 hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0189
0190 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0191 hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 10, dir);
0192 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 9, dir);
0193 hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 8, dir);
0194 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 7, dir);
0195
0196 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0197 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0198 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0199 hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0200 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0201 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 8, dir);
0202
0203 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0204 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 10, dir);
0205 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 3, dir);
0206 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0207 hybridBitonicSortUtils::compAndSwap(a, low + 8, low + 9, dir);
0208
0209 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0210 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0211 hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 8, dir);
0212 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 9, dir);
0213
0214 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0215 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0216 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 8, dir);
0217
0218 return;
0219 case 13:
0220 for (int i = 0; i + 1 < 13; i += 2) {
0221 hybridBitonicSortUtils::compAndSwap(a, low + i, low + i + 1, dir);
0222 }
0223
0224 for (int i = 0; i + 3 < 13; i += 4) {
0225 hybridBitonicSortUtils::compAndSwap(a, low + i + 0, low + i + 2, dir);
0226 hybridBitonicSortUtils::compAndSwap(a, low + i + 1, low + i + 3, dir);
0227 }
0228
0229 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 4, dir);
0230 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 5, dir);
0231 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 6, dir);
0232 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 7, dir);
0233 hybridBitonicSortUtils::compAndSwap(a, low + 8, low + 12, dir);
0234
0235 hybridBitonicSortUtils::compAndSwap(a, low + 0, low + 8, dir);
0236 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 9, dir);
0237 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 10, dir);
0238 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 11, dir);
0239 hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 12, dir);
0240
0241 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 2, dir);
0242 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 12, dir);
0243 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 11, dir);
0244 hybridBitonicSortUtils::compAndSwap(a, low + 4, low + 8, dir);
0245 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 10, dir);
0246 hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 9, dir);
0247
0248 hybridBitonicSortUtils::compAndSwap(a, low + 1, low + 4, dir);
0249 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 8, dir);
0250 hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 12, dir);
0251 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 10, dir);
0252 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 9, dir);
0253
0254 hybridBitonicSortUtils::compAndSwap(a, low + 2, low + 4, dir);
0255 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 5, dir);
0256 hybridBitonicSortUtils::compAndSwap(a, low + 6, low + 8, dir);
0257 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 9, dir);
0258 hybridBitonicSortUtils::compAndSwap(a, low + 10, low + 12, dir);
0259
0260 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 6, dir);
0261 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 8, dir);
0262 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 10, dir);
0263 hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 12, dir);
0264
0265 hybridBitonicSortUtils::compAndSwap(a, low + 3, low + 4, dir);
0266 hybridBitonicSortUtils::compAndSwap(a, low + 5, low + 6, dir);
0267 hybridBitonicSortUtils::compAndSwap(a, low + 7, low + 8, dir);
0268 hybridBitonicSortUtils::compAndSwap(a, low + 9, low + 10, dir);
0269 hybridBitonicSortUtils::compAndSwap(a, low + 11, low + 12, dir);
0270
0271 return;
0272 }
0273 }
0274
0275
0276 if (N > 1) {
0277 int lowerSize = N / 2;
0278 int upperSize = N - N / 2;
0279 bool notDir = not dir;
0280 hybridBitonicSortRef(a, lowerSize, low, notDir, hybrid);
0281 hybridBitonicSortRef(a, upperSize, low + lowerSize, dir, hybrid);
0282 hybridBitonicMergeRef(a, N, low, dir);
0283
0284 }
0285 }
0286
0287 template <typename T>
0288 void hybrid_bitonic_sort_and_crop_ref(
0289 unsigned int nIn, unsigned int nOut, const T in[], T out[], bool hybrid = true) {
0290 T work[nIn];
0291 for (unsigned int i = 0; i < nIn; ++i) {
0292 work[i] = in[i];
0293 }
0294 hybridBitonicSortRef(work, nIn, 0, false, hybrid);
0295 for (unsigned int i = 0; i < nOut; ++i) {
0296 out[i] = work[i];
0297 }
0298 }
0299
0300 template <typename T>
0301 void folded_hybrid_bitonic_sort_and_crop_ref(
0302 unsigned int nIn, unsigned int nOut, const T in[], T out[], bool hybrid = true) {
0303 unsigned int nHalf = (nIn + 1) / 2;
0304 T work[nHalf], halfsorted[nHalf];
0305
0306
0307
0308
0309
0310 for (int o = 1; o >= 0; --o) {
0311 for (unsigned int i = 0; i < nHalf && 2 * i + o < nIn; ++i) {
0312 work[i] = in[2 * i + o];
0313 }
0314 if ((nIn % 2 == 1) && (o == 1)) {
0315 hybridBitonicSortUtils::clear(work[nHalf - 1]);
0316 }
0317 hybridBitonicSortRef(work, nHalf, 0, false, hybrid);
0318
0319
0320
0321
0322
0323 for (unsigned int i = 1; i < nHalf; ++i)
0324 assert(!(work[i - 1] < work[i]));
0325 if (o == 1) {
0326 for (unsigned int i = 0; i < nHalf; ++i) {
0327 halfsorted[i] = work[i];
0328 }
0329 }
0330 }
0331
0332 unsigned int nMerge = std::min(nOut, nHalf);
0333 T tomerge[2 * nMerge];
0334 for (unsigned int i = 0; i < nMerge; ++i) {
0335 tomerge[nMerge - i - 1] = halfsorted[i];
0336 tomerge[nMerge + i] = work[i];
0337 }
0338
0339
0340
0341
0342 hybridBitonicMergeRef(tomerge, 2 * nMerge, 0, false);
0343
0344
0345
0346
0347
0348 for (unsigned int i = 0; i < nOut; ++i) {
0349 out[i] = tomerge[i];
0350 if (i > 0)
0351 assert(!(out[i - 1] < out[i]));
0352 }
0353 }
0354
0355 #endif