21#ifndef TLX_SORT_STRINGS_RADIX_SORT_HEADER
22#define TLX_SORT_STRINGS_RADIX_SORT_HEADER
39namespace sort_strings_detail {
48template <
typename StringShadowPtr>
63 for (
Iterator i = ss.begin(); i != ss.end(); ++i)
64 ++
bkt_size[ss.get_uint8(ss[i], depth)];
69 for (
size_t i = 1; i < 256; ++i)
70 bkt_index[i] = bkt_index[i - 1] +
bkt_size[i - 1];
73 for (
Iterator i = ss.begin(); i != ss.end(); ++i)
74 *(bkt_index[ss.get_uint8(ss[i], depth)]++) = std::move(ss[i]);
85 size_t size = ss.
size();
88 for (
size_t i = 1; i <
pos; ++i)
93 if (bkt > 0 && bkt < size)
111template <
typename StringShadowPtr>
117 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
118 radixstack.emplace(strptr, depth);
120 while (radixstack.size())
122 while (radixstack.top().idx < 255)
124 RadixStep& rs = radixstack.top();
127 size_t bkt_size = rs.
bkt_size[++rs.idx];
134 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
135 depth + radixstack.size(),
136 memory -
sizeof(RadixStep) * radixstack.size());
142 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
145 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
146 depth + radixstack.size(),
147 memory -
sizeof(RadixStep) * radixstack.size());
153 radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
154 depth + radixstack.size());
165template <
typename StringPtr>
170 const StringSet& ss = strptr.
active();
178 2 *
sizeof(size_t) +
sizeof(StringSet)
179 + ss.size() *
sizeof(
typename StringSet::String);
180 size_t memory_slack = 3 *
sizeof(RadixStep);
182 if (memory != 0 && memory < memory_use + memory_slack + 1)
185 typename StringSet::Container shadow = ss.allocate(ss.size());
187 strptr.
add_shadow(StringSet(shadow)), depth, memory - memory_use);
188 StringSet::deallocate(shadow);
194template <
typename StringShadowPtr>
203 std::uint8_t* charcache) :
strptr(in_strptr) {
206 const size_t n = ss.size();
210 std::uint8_t* cc = charcache;
211 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
212 *cc = ss.get_uint8(ss[i], depth);
213 for (cc = charcache; cc != charcache + n; ++cc)
214 ++
bkt_size[
static_cast<std::uint8_t
>(*cc)];
219 for (
size_t i = 1; i < 256; ++i)
220 bkt_index[i] = bkt_index[i - 1] +
bkt_size[i - 1];
224 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
225 *(bkt_index[
static_cast<std::uint8_t
>(*cc)]++) = std::move(ss[i]);
235 size_t size = ss.
size();
238 for (
size_t i = 1; i <
pos; ++i)
243 if (bkt > 0 && bkt < size)
258template <
typename StringPtr>
260radixsort_CI3(
const StringPtr& strptr, std::uint16_t* charcache,
261 size_t depth,
size_t memory);
266template <
typename StringShadowPtr>
269 std::uint8_t* charcache,
size_t depth,
size_t memory) {
273 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
274 radixstack.emplace(strptr, depth, charcache);
278 while (
TLX_LIKELY(radixstack.top().idx < 255))
280 RadixStep& rs = radixstack.top();
283 size_t bkt_size = rs.
bkt_size[++rs.idx];
290 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
291 depth + radixstack.size(),
292 memory -
sizeof(RadixStep) * radixstack.size());
298 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
301 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
302 depth + radixstack.size(),
303 memory -
sizeof(RadixStep) * radixstack.size());
310 radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
311 depth + radixstack.size(), charcache);
318template <
typename StringPtr>
323 const StringSet& ss = strptr.
active();
331 2 *
sizeof(size_t) +
sizeof(StringSet)
332 + ss.size() *
sizeof(std::uint8_t)
333 + ss.size() *
sizeof(
typename StringSet::String);
334 size_t memory_slack = 3 *
sizeof(RadixStep);
336 if (memory != 0 && memory < memory_use + memory_slack + 1)
339 typename StringSet::Container shadow = ss.allocate(ss.size());
340 std::uint8_t* charcache =
new std::uint8_t[ss.size()];
343 charcache, depth, memory - memory_use);
346 StringSet::deallocate(shadow);
352template <
typename StringShadowPtr>
363 std::uint16_t* charcache) :
strptr(in_strptr) {
366 const size_t n = ss.size();
370 std::uint16_t* cc = charcache;
371 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
372 *cc = ss.get_uint16(ss[i], depth);
373 for (cc = charcache; cc != charcache + n; ++cc)
374 ++
bkt_size[
static_cast<std::uint16_t
>(*cc)];
379 for (
size_t i = 1; i <
RADIX; ++i)
380 bkt_index[i] = bkt_index[i - 1] +
bkt_size[i - 1];
385 for (
size_t i = 1; i <
bkt_size[0]; ++i)
390 size_t bkt = bkt_index[first] +
bkt_size[first] - bkt_index[0];
393 while (second <
RADIX)
395 size_t partial_equal = (first >> 8) == (second >> 8);
405 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
406 *(bkt_index[
static_cast<std::uint16_t
>(*cc)]++) = std::move(ss[i]);
429template <
typename StringShadowPtr>
432 std::uint16_t* charcache,
size_t depth,
size_t memory) {
434 enum { RADIX = 0x10000 };
438 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
439 radixstack.emplace(strptr, depth, charcache);
443 while (
TLX_LIKELY(radixstack.top().idx < RADIX - 1))
445 RadixStep& rs = radixstack.top();
448 size_t bkt_size = rs.
bkt_size[++rs.idx];
455 rs.strptr.flip(rs.pos, bkt_size).copy_back();
456 for (
size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
457 rs.strptr.set_lcp(i, depth + 2 * radixstack.size() - 1);
463 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
464 depth + 2 * radixstack.size(),
465 memory -
sizeof(RadixStep) * radixstack.size());
468 else if (bkt_size < RADIX)
471 rs.strptr.flip(rs.pos, bkt_size),
472 reinterpret_cast<std::uint8_t*
>(charcache),
473 depth + 2 * radixstack.size(),
474 memory -
sizeof(RadixStep) * radixstack.size());
480 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
483 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
484 depth + 2 * radixstack.size(),
485 memory -
sizeof(RadixStep) * radixstack.size());
493 rs.strptr.flip(rs.pos - bkt_size, bkt_size),
494 depth + 2 * radixstack.size(), charcache);
501template <
typename StringPtr>
504 enum { RADIX = 0x10000 };
507 const StringSet& ss = strptr.
active();
511 if (ss.size() < RADIX)
518 2 *
sizeof(size_t) +
sizeof(StringSet)
519 + ss.size() *
sizeof(std::uint16_t)
520 + ss.size() *
sizeof(
typename StringSet::String);
521 size_t memory_slack = 3 *
sizeof(RadixStep);
523 if (memory != 0 && memory < memory_use + memory_slack + 1)
526 typename StringSet::Container shadow = ss.allocate(ss.size());
527 std::uint16_t* charcache =
new std::uint16_t[ss.size()];
530 charcache, depth, memory - memory_use);
533 StringSet::deallocate(shadow);
539template <
typename StringPtr>
543 typedef typename StringSet::String
String;
549 size_t base,
size_t depth, std::uint8_t* charcache) {
552 size_t size = ss.size();
556 std::uint8_t* cc = charcache;
557 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
558 *cc = ss.get_uint8(ss[i], depth);
559 for (cc = charcache; cc != charcache + size; ++cc)
560 ++
bkt_size[
static_cast<std::uint8_t
>(*cc)];
566 for (
size_t i = 1; i < 256; ++i) {
572 for (
size_t i = 0, j; i < size - last_bkt_size; )
574 String perm = std::move(ss[ss.begin() + i]);
575 std::uint8_t permch = charcache[i];
576 while ((j = --bkt[permch]) > i)
578 std::swap(perm, ss[ss.begin() + j]);
579 std::swap(permch, charcache[j]);
581 ss[ss.begin() + i] = std::move(perm);
592 for (
size_t i = 1; i <
bkt_size[0]; ++i)
597 if (lbkt > 0 && lbkt < size)
615template <
typename StringPtr>
618 size_t depth,
size_t memory) {
622 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
623 radixstack.emplace(strptr, 0, depth, charcache);
627 while (
TLX_LIKELY(radixstack.top().idx < 255))
629 RadixStep& rs = radixstack.top();
632 size_t bkt_size = rs.
bkt_size[++rs.idx];
640 strptr.
sub(rs.pos, bkt_size),
641 depth + radixstack.
size(),
642 memory -
sizeof(RadixStep) * radixstack.size());
648 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
651 strptr.
sub(rs.pos, bkt_size),
652 depth + radixstack.
size(),
653 memory -
sizeof(RadixStep) * radixstack.size());
661 strptr.
sub(rs.pos - bkt_size, bkt_size),
662 rs.pos - bkt_size, depth + radixstack.
size(), charcache);
672template <
typename StringPtr>
684 2 *
sizeof(size_t) +
sizeof(StringSet)
685 + strptr.
size() *
sizeof(std::uint8_t);
686 size_t memory_slack = 3 *
sizeof(RadixStep);
688 if (memory != 0 && memory < memory_use + memory_slack + 1)
691 std::uint8_t* charcache =
new std::uint8_t[strptr.
size()];
693 radixsort_CI2(strptr, charcache, depth, memory - memory_use);
701template <
typename StringPtr>
707 typedef typename StringSet::String
String;
713 std::uint16_t* charcache) {
716 const size_t n = ss.size();
719 std::uint16_t* cc = charcache;
720 for (
Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
721 *cc = ss.get_uint16(ss[i], depth);
722 for (cc = charcache; cc != charcache + n; ++cc)
723 ++
bkt_size[
static_cast<std::uint16_t
>(*cc)];
729 for (
size_t i = 1; i <
RADIX; ++i) {
730 bkt_index[i] = bkt_index[i - 1] +
bkt_size[i];
737 for (
size_t i = 1; i <
bkt_size[0]; ++i)
742 size_t bkt = bkt_index[first];
745 while (second <
RADIX)
747 size_t partial_equal = (first >> 8) == (second >> 8);
748 strptr.
set_lcp(bkt, depth + partial_equal);
756 for (
size_t i = 0, j; i < n - last_bkt_size; )
758 String perm = std::move(ss[ss.begin() + i]);
759 std::uint16_t permch = charcache[i];
760 while ((j = --bkt_index[permch]) > i)
762 std::swap(perm, ss[ss.begin() + j]);
763 std::swap(permch, charcache[j]);
765 ss[ss.begin() + i] = std::move(perm);
787template <
typename StringPtr>
790 size_t depth,
size_t memory) {
791 enum { RADIX = 0x10000 };
795 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
796 radixstack.emplace(strptr, 0, depth, charcache);
800 while (
TLX_LIKELY(radixstack.top().idx < RADIX - 1))
802 RadixStep& rs = radixstack.top();
805 size_t bkt_size = rs.
bkt_size[++rs.idx];
813 for (
size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
814 strptr.
set_lcp(i, depth + 2 * radixstack.size() - 1);
821 strptr.
sub(rs.pos, bkt_size),
822 depth + 2 * radixstack.
size(),
823 memory -
sizeof(RadixStep) * radixstack.size());
826 else if (bkt_size < RADIX)
829 strptr.
sub(rs.pos, bkt_size),
830 reinterpret_cast<std::uint8_t*
>(charcache),
831 depth + 2 * radixstack.
size(),
832 memory -
sizeof(RadixStep) * radixstack.size());
838 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
841 strptr.
sub(rs.pos, bkt_size),
842 depth + 2 * radixstack.
size(),
843 memory -
sizeof(RadixStep) * radixstack.size());
851 strptr.
sub(rs.pos - bkt_size, bkt_size),
853 depth + 2 * radixstack.
size(), charcache);
860template <
typename StringPtr>
863 enum { RADIX = 0x10000 };
869 if (strptr.
size() < RADIX)
876 2 *
sizeof(size_t) +
sizeof(StringSet)
877 + strptr.
size() *
sizeof(std::uint16_t);
878 size_t memory_slack = 3 *
sizeof(RadixStep);
880 if (memory != 0 && memory < memory_use + memory_slack + 1)
883 std::uint16_t* charcache =
new std::uint16_t[strptr.
size()];
884 radixsort_CI3(strptr, charcache, depth, memory - memory_use);
Simpler non-growing vector without initialization.
Objectified string array pointer array.
size_t size() const
return valid length
const StringSet & active() const
return currently active array
StringPtr sub(size_t offset, size_t sub_size) const
Advance (both) pointers by given offset, return sub-array.
static const bool with_lcp
if we want to save the LCPs
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
WithShadow add_shadow(const StringSet &shadow) const
construct objectified string and shadow pointer class
Objectified string array pointer and shadow pointer array for out-of-place swapping of pointers.
size_t size() const
return valid length
const StringSet & active() const
return currently active array
const StringSet & shadow() const
return current shadow array
StringShadowPtr flip(size_t offset, size_t sub_size) const
construct a StringShadowPtr object specifying a sub-array with flipping to other array.
StringShadowPtr copy_back() const
return subarray pointer to n strings in original array, might copy from shadow before returning.
static const bool with_lcp
if we want to save the LCPs
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
static void multikey_quicksort(const StringPtr &strptr, size_t depth, size_t memory)
Generic multikey quicksort for strings.
static const size_t g_inssort_threshold
static enable_if<!StringPtr::with_lcp, void >::type insertion_sort(const StringPtr &strptr, size_t depth, size_t)
Generic insertion sort for abstract string sets.
static void radixsort_CE2_loop(const StringShadowPtr &strptr, std::uint8_t *charcache, size_t depth, size_t memory)
static void radixsort_CE2(const StringPtr &strptr, size_t depth, size_t memory)
static void radixsort_CE3(const StringPtr &strptr, size_t depth, size_t memory)
static void radixsort_CE0_loop(const StringShadowPtr &strptr, size_t depth, size_t memory)
static void radixsort_CE3_loop(const StringShadowPtr &strptr, std::uint16_t *charcache, size_t depth, size_t memory)
static void radixsort_CE0(const StringPtr &strptr, size_t depth, size_t memory)
Adapter method to allocate shadow array if needed.
static void radixsort_CI2(const StringPtr &strptr, std::uint8_t *charcache, size_t depth, size_t memory)
static void radixsort_CI3(const StringPtr &strptr, std::uint16_t *charcache, size_t depth, size_t memory)
StringSet::Iterator Iterator
StringShadowPtr::StringSet StringSet
RadixStep_CE0(const StringShadowPtr &in_strptr, size_t depth)
StringSet::Iterator Iterator
StringShadowPtr::StringSet StringSet
RadixStep_CE2(const StringShadowPtr &in_strptr, size_t depth, std::uint8_t *charcache)
StringSet::Iterator Iterator
StringShadowPtr::StringSet StringSet
RadixStep_CE3(const StringShadowPtr &in_strptr, size_t depth, std::uint16_t *charcache)
size_t get_next_non_empty_bkt_index(size_t start)
StringSet::Iterator Iterator
StringPtr::StringSet StringSet
RadixStep_CI2(const StringPtr &strptr, size_t base, size_t depth, std::uint8_t *charcache)
StringSet::Iterator Iterator
StringPtr::StringSet StringSet
RadixStep_CI3(const StringPtr &strptr, size_t base, size_t depth, std::uint16_t *charcache)
size_t get_next_non_empty_bkt_index(size_t start)