42 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
54 #if _GLIBCXX_ASSERTIONS
58 namespace __gnu_parallel
61 template<
typename RandomAccessIterator>
65 typedef typename traits_type::difference_type difference_type;
98 template<
typename RandomAccessIterator,
typename Comparator>
99 typename std::iterator_traits<RandomAccessIterator>::difference_type
100 qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
103 _GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
106 typedef typename traits_type::value_type value_type;
107 typedef typename traits_type::difference_type difference_type;
109 RandomAccessIterator pivot_pos =
113 #if defined(_GLIBCXX_ASSERTIONS)
115 difference_type n = end - begin;
117 _GLIBCXX_PARALLEL_ASSERT(
118 (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
119 || (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
120 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
121 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
122 || (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
123 || (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
127 if (pivot_pos != (end - 1))
128 std::swap(*pivot_pos, *(end - 1));
132 pred(comp, *pivot_pos);
136 begin, end - 1, pred, num_threads);
139 std::swap(*(begin + split_pos), *pivot_pos);
140 pivot_pos = begin + split_pos;
142 #if _GLIBCXX_ASSERTIONS
143 RandomAccessIterator r;
144 for (r = begin; r != pivot_pos; ++r)
145 _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
146 for (; r != end; ++r)
147 _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
161 template<
typename RandomAccessIterator,
typename Comparator>
164 RandomAccessIterator begin, RandomAccessIterator end,
170 typedef typename traits_type::value_type value_type;
171 typedef typename traits_type::difference_type difference_type;
173 difference_type n = end - begin;
175 if (num_threads <= 1 || n <= 1)
177 tls[iam]->
initial.first = begin;
178 tls[iam]->
initial.second = end;
186 difference_type split_pos =
qsb_divide(begin, end, comp, num_threads);
188 #if _GLIBCXX_ASSERTIONS
189 _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
193 std::max<thread_index_t>(1, std::min<thread_index_t>(
194 num_threads - 1, split_pos * num_threads / n));
200 # pragma omp parallel num_threads(2)
203 if(omp_get_num_threads() < 2)
208 # pragma omp sections
214 num_threads_leftside,
222 iam + num_threads_leftside,
223 num_threads - num_threads_leftside,
237 template<
typename RandomAccessIterator,
typename Comparator>
240 Comparator& comp,
int iam,
bool wait)
243 typedef typename traits_type::value_type value_type;
244 typedef typename traits_type::difference_type difference_type;
249 difference_type base_case_n =
260 difference_type elements_done = 0;
261 #if _GLIBCXX_ASSERTIONS
262 difference_type total_elements_done = 0;
268 RandomAccessIterator begin = current.first, end = current.second;
269 difference_type n = end - begin;
274 RandomAccessIterator pivot_pos = begin + rng(n);
277 if (pivot_pos != (end - 1))
278 std::swap(*pivot_pos, *(end - 1));
282 <Comparator, value_type, value_type,
bool>
283 pred(comp, *pivot_pos);
286 RandomAccessIterator split_pos1, split_pos2;
287 split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
290 #if _GLIBCXX_ASSERTIONS
291 _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
294 if (split_pos1 != pivot_pos)
295 std::swap(*split_pos1, *pivot_pos);
296 pivot_pos = split_pos1;
299 if ((split_pos1 + 1 - begin) < (n >> 7)
300 || (end - split_pos1) < (n >> 7))
305 <Comparator, value_type, value_type,
bool>, value_type>
307 <Comparator, value_type, value_type,
bool>(comp,
311 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
316 split_pos2 = split_pos1 + 1;
319 elements_done += (split_pos2 - split_pos1);
320 #if _GLIBCXX_ASSERTIONS
321 total_elements_done += (split_pos2 - split_pos1);
324 if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
327 if ((split_pos2) != end)
332 current.second = split_pos1;
338 if (begin != split_pos1)
342 current.first = split_pos2;
349 __gnu_sequential::sort(begin, end, comp);
351 #if _GLIBCXX_ASSERTIONS
352 total_elements_done += n;
364 #if _GLIBCXX_ASSERTIONS
365 double search_start = omp_get_wtime();
369 bool successfully_stolen =
false;
373 && (omp_get_wtime() < (search_start + 1.0))
378 victim = rng(num_threads);
381 successfully_stolen = (victim != iam)
382 && tls[victim]->leftover_parts.pop_back(current);
383 if (!successfully_stolen)
385 #if !defined(__ICC) && !defined(__ECC)
390 #if _GLIBCXX_ASSERTIONS
391 if (omp_get_wtime() >= (search_start + 1.0))
394 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
395 < (search_start + 1.0));
398 if (!successfully_stolen)
400 #if _GLIBCXX_ASSERTIONS
416 template<
typename RandomAccessIterator,
typename Comparator>
425 typedef typename traits_type::value_type value_type;
426 typedef typename traits_type::difference_type difference_type;
431 difference_type n = end - begin;
441 tls_type** tls =
new tls_type*[num_threads];
442 difference_type queue_size = num_threads * (
thread_index_t)(log2(n) + 1);
450 volatile difference_type elements_leftover = n;
451 for (
int i = 0; i < num_threads; ++i)
454 tls[i]->num_threads = num_threads;
455 tls[i]->global = std::make_pair(begin, end);
458 tls[i]->initial = std::make_pair(end, end);
462 qsb_conquer(tls, begin, begin + n, comp, 0, num_threads,
true);
464 #if _GLIBCXX_ASSERTIONS
467 for (
int i = 1; i < num_threads; ++i)
468 _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
471 for (
int i = 0; i < num_threads; ++i)
static const _Settings & get()
Get the global settings.
Parallel implementation of std::partition(), std::nth_element(), and std::partial_sort(). This file is a GNU parallel extension to the Standard C++ Library.
thread_index_t num_threads
Number of threads involved in this algorithm.
void qsb_conquer(QSBThreadLocal< RandomAccessIterator > **tls, RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, thread_index_t iam, thread_index_t num_threads, bool parent_wait)
Quicksort conquer step.
std::iterator_traits< RandomAccessIterator >::difference_type parallel_partition(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, thread_index_t num_threads)
Parallel implementation of std::partition.
std::pair< RandomAccessIterator, RandomAccessIterator > Piece
Continuous part of the sequence, described by an iterator pair.
std::iterator_traits< RandomAccessIterator >::difference_type qsb_divide(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, thread_index_t num_threads)
Balanced quicksort divide step.
#define _GLIBCXX_ASSERTIONS
Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. Should be switched on only locally...
Similar to std::binder1st, but giving the argument types explicitly.
Similar to std::binder1st, but giving the argument types explicitly.
Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...
uint16 thread_index_t
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
sequence_index_t sort_qsb_base_case_maximal_n
Maximal subsequence length to switch to unbalanced base case. Applies to std::sort with dynamically l...
Runtime settings and tuning parameters, heuristics to decide whether to use parallelized algorithms...
void yield()
Yield the control to another thread, without waiting for the end to the time slice.
volatile difference_type * elements_leftover
Pointer to a counter of elements left over to sort.
Lock-free double-ended queue. This file is a GNU parallel extension to the Standard C++ Library...
RandomAccessIterator median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, RandomAccessIterator c, Comparator &comp)
Compute the median of three referenced elements, according to comp.
Random number generator, based on the Mersenne twister.
Information local to one thread in the parallel quicksort run.
Piece global
The complete sequence to sort.
Double-ended queue of bounded size, allowing lock-free atomic access. push_front() and pop_front() mu...
void qsb_local_sort_with_helping(QSBThreadLocal< RandomAccessIterator > **tls, Comparator &comp, int iam, bool wait)
Quicksort step doing load-balanced local sort.
RestrictedBoundedConcurrentQueue< Piece > leftover_parts
Work-stealing queue.
void parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, thread_index_t num_threads)
Top-level quicksort routine.
QSBThreadLocal(int queue_size)
Constructor.
Includes the original header files concerned with iterators except for stream iterators. This file is a GNU parallel extension to the Standard C++ Library.
#define _GLIBCXX_CALL(n)
Macro to produce log message when entering a function.
Similar to std::binder2nd, but giving the argument types explicitly.
Routines for checking the correctness of algorithm results. This file is a GNU parallel extension to ...
Piece initial
Initial piece to work on.