-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathRadixSortLsdBenchmark.cpp
More file actions
114 lines (99 loc) · 5.04 KB
/
RadixSortLsdBenchmark.cpp
File metadata and controls
114 lines (99 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <stddef.h>
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <chrono>
#include <random>
#include <ratio>
#include <vector>
#include <execution>
#include "RadixSortLSD.h"
#include "RadixSortLsdParallel.h"
using std::chrono::duration;
using std::chrono::duration_cast;
using std::chrono::high_resolution_clock;
using std::milli;
using std::random_device;
using std::sort;
using std::vector;
const int iterationCount = 5;
static void print_results(const char* const tag, const unsigned* sorted, size_t sortedLength,
high_resolution_clock::time_point startTime,
high_resolution_clock::time_point endTime) {
printf("%s: Lowest: %u Highest: %u Time: %fms\n", tag,
sorted[0], sorted[sortedLength - 1],
duration_cast<duration<double, milli>>(endTime - startTime).count());
}
int RadixSortLsdBenchmark(vector<unsigned>& uints)
{
vector<unsigned> uintsCopy(uints);
vector<unsigned> tmp_working(uints); // temporary working array/buffer
for (int i = 0; i < iterationCount; ++i)
{
for (size_t j = 0; j < uints.size(); j++) { // copy the original random array into the source array each time, since ParallelMergeSort modifies the source array while sorting
uintsCopy[ j] = uints[j];
tmp_working[j] = (unsigned)j; // page in the destination array into system memory
}
// Eliminate compiler ability to optimize paging-in of the input and output arrays
// Paging-in source and destination arrays leads to a 50% speed-up on Linux, and 15% on Windows
vector<unsigned> sorted_reference(uints);
sort(std::execution::par_unseq, sorted_reference.begin(), sorted_reference.end());
//printf("uintsCopy address = %p sorted address = %p value at a random location = %lu %lu\n", uintsCopy, sorted, sorted[static_cast<unsigned>(rd()) % uints.size()], uintsCopy[static_cast<unsigned>(rd()) % uints.size()]);
const auto startTime = high_resolution_clock::now();
RadixSortLSDPowerOf2Radix_unsigned_TwoPhase( uintsCopy.data(), tmp_working.data(), uints.size());
//RadixSortLSDPowerOf2Radix_unsigned_TwoPhase_DeRandomize(uintsCopy.data(), tmp_working.data(), uints.size());
//RadixSortLSDPowerOf2Radix_Nbit_TwoPhase_DeRandomize< 11 >(uintsCopy.data(), tmp_working.data(), uints.size());
//RadixSortLSDPowerOf2Radix(uintsCopy.data(), tmp_working.data(), uints.size()); // baseline serial implementation - i.e. the slowest
//RadixSortLSDPowerOf2Radix_Nbit<11>(uintsCopy.data(), tmp_working.data(), uints.size()); // baseline serial implementation - i.e. the slowest
//sort_radix_in_place_adaptive(uintsCopy, (unsigned long)uints.size(), 0.1);
//sort_radix_in_place_stable_adaptive(uintsCopy, uints.size(), 0.9);
const auto endTime = high_resolution_clock::now();
print_results("Radix Sort LSD", uintsCopy.data(), uints.size(), startTime, endTime);
if (!std::equal(sorted_reference.begin(), sorted_reference.end(), uintsCopy.begin()))
{
printf("Arrays are not equal\n");
for(size_t i = 0; i < uints.size(); i++)
{
if (sorted_reference[i] != uintsCopy[i])
{
printf("Difference at index %zu: sorted_reference = %x, uintsCopy = %x\n", i, sorted_reference[i], uintsCopy[i]);
break;
}
}
exit(1);
}
}
return 0;
}
int ParallelRadixSortLsdBenchmark(vector<unsigned>& uints)
{
vector<unsigned> uintsCopy( uints.size());
vector<unsigned> tmp_working(uints.size());
printf("\n");
// time how long it takes to sort them:
for (int i = 0; i < iterationCount; ++i)
{
for (size_t j = 0; j < uints.size(); j++) { // copy the original random array into the source array each time, since ParallelMergeSort modifies the source array while sorting
uintsCopy[ j] = uints[j];
tmp_working[j] = (unsigned)j; // page in the destination array into system memory
}
// Eliminate compiler ability to optimize paging-in of the input and output arrays
// Paging-in source and destination arrays leads to a 50% speed-up on Linux, and 15% on Windows
vector<unsigned> sorted_reference(uints);
stable_sort(std::execution::par_unseq, sorted_reference.begin(), sorted_reference.end());
//printf("uintsCopy address = %p sorted address = %p value at a random location = %lu %lu\n", uintsCopy, tmp_working, tmp_working[static_cast<unsigned>(rd()) % uints.size()], uintsCopy[static_cast<unsigned>(rd()) % uints.size()]);
const auto startTime = high_resolution_clock::now();
//RadixSortLSDPowerOf2Radix_unsigned_TwoPhase(uintsCopy, tmp_working, uints.size());
//RadixSortLSDPowerOf2RadixParallel_unsigned_TwoPhase_DeRandomize(uintsCopy, tmp_working, (unsigned long)uints.size());
//SortRadixPar(uintsCopy, tmp_working, uints.size(), uints.size() / 24); // slower than using all cores
ParallelAlgorithms::SortRadixPar(uintsCopy.data(), tmp_working.data(), uints.size()); // fastest on 96-core Intel and AMD AWS c7 nodes
const auto endTime = high_resolution_clock::now();
print_results("Parallel Radix Sort LSD", uintsCopy.data(), uints.size(), startTime, endTime);
if (!std::equal(sorted_reference.begin(), sorted_reference.end(), uintsCopy.data()))
{
printf("Arrays are not equal\n");
exit(1);
}
}
return 0;
}