step20
Loading...
Searching...
No Matches
suffix_array.hpp
Go to the documentation of this file.
1// Andrew Naplavkov
2
3#ifndef STEP20_SUFFIX_ARRAY_HPP
4#define STEP20_SUFFIX_ARRAY_HPP
5
6#include "to.hpp"
7#include <functional>
8#include <span>
9#include <string>
10
11namespace step20 {
12
14
20template <class Char,
21 std::unsigned_integral Size = std::size_t,
22 class Compare = std::less<>>
24public:
25 using value_type = Char;
26 using size_type = Size;
27 const Char* data() const { return str_.data(); }
28 Size size() const { return str_.size(); }
29 Size operator[](Size n) const { return pos_[n]; }
30 const Compare& comp() const { return comp_; }
31 virtual ~suffix_array() = default;
32
33 explicit suffix_array(std::basic_string<Char> str, const Compare& comp = {})
34 : str_(std::move(str)), pos_(size()), comp_(comp)
35 {
36 auto sufs = std::vector<suffix>(size());
37 auto gen = [i = Size{}]() mutable { return suffix{i++, {}}; };
38 auto ch = [&](auto& suf) { return str_[suf.pos]; };
39 auto comp_ch = [&](auto& l, auto& r) { return comp_(ch(l), ch(r)); };
40 auto comp_rank = [](auto& l, auto& r) { return l.rank < r.rank; };
41 std::ranges::generate(sufs, gen);
42 fill_first_rank(sufs, comp_ch);
43 for (Size offset = 1; !sorted(sufs); offset *= 2) {
44 fill_second_rank(sufs, offset);
45 fill_first_rank(sufs, comp_rank);
46 }
47 std::ranges::transform(sufs, pos_.begin(), &suffix::pos);
48 }
49
50 template <std::ranges::input_range R>
51 explicit suffix_array(R&& r, const Compare& comp = {})
52 : suffix_array(to<std::basic_string>(std::forward<R>(r)), comp)
53 {
54 }
55
57
60 std::span<const Size> find(std::ranges::input_range auto&& str) const
61 {
62 auto result = std::span{pos_};
63 auto first = std::ranges::begin(str);
64 auto last = std::ranges::end(str);
65 auto i = Size{};
66 auto ch = [&](Size n) { return n < str_.size() ? str_[n] : *first; };
67 auto comp = [&](Size l, Size r) { return comp_(ch(l + i), ch(r + i)); };
68 for (; first != last; ++first, ++i)
69 result = std::ranges::equal_range(result, size(), comp);
70 return result;
71 }
72
73private:
74 std::basic_string<Char> str_;
75 std::vector<Size> pos_;
76 Compare comp_;
77
78 struct suffix {
79 Size pos;
80 std::pair<Size, Size> rank;
81
82 friend void fill_first_rank(std::vector<suffix>& sufs, auto comp)
83 {
84 std::ranges::sort(sufs, comp);
85 Size acc = 1;
86 for (Size i = 1; i < sufs.size(); ++i) {
87 bool diff = comp(sufs[i - 1], sufs[i]);
88 sufs[i - 1].rank.first = acc;
89 acc += diff;
90 }
91 if (!sufs.empty())
92 sufs.back().rank.first = acc;
93 }
94
95 friend void fill_second_rank(std::vector<suffix>& sufs, Size offset)
96 {
97 auto ranks = std::vector<Size>(sufs.size());
98 auto at = [&](Size n) { return n < ranks.size() ? ranks[n] : 0; };
99 for (auto& suf : sufs)
100 ranks[suf.pos] = suf.rank.first;
101 for (auto& suf : sufs)
102 suf.rank.second = at(suf.pos + offset);
103 }
104
105 friend bool sorted(const std::vector<suffix>& sufs)
106 {
107 return sufs.empty() || sufs.back().rank.first == sufs.size();
108 }
109 };
110};
111
112template <std::ranges::input_range R, class Compare = std::less<>>
113suffix_array(R, Compare = {})
114 -> suffix_array<std::ranges::range_value_t<R>, std::size_t, Compare>;
115
117
119template <class Char,
120 std::unsigned_integral Size = std::size_t,
121 class Compare = std::less<>>
122class enhanced_suffix_array : public suffix_array<Char, Size, Compare> {
123 std::vector<Size> lcp_;
124
125public:
126 std::span<const Size> lcp_array() const { return lcp_; }
127
129 : suffix_array<Char, Size, Compare>(std::move(arr)), lcp_(this->size())
130 {
131 const auto& me = *this;
132 auto inv = std::vector<Size>(me.size());
133 for (Size i = 0; i < me.size(); ++i)
134 inv[me[i]] = i;
135 auto first = me.data();
136 auto last = first + me.size();
137 auto eq = equivalence_fn(me.comp());
138 for (Size i = 0, lcp = 0; i < me.size(); ++i, lcp -= !!lcp) {
139 Size cur = inv[i];
140 if (Size next = cur + 1; next < me.size()) {
141 auto diff = std::mismatch(
142 first + i + lcp, last, first + me[next] + lcp, last, eq);
143 lcp = diff.first - (first + i);
144 }
145 else
146 lcp = 0;
147 lcp_[cur] = lcp;
148 }
149 }
150
151 template <std::ranges::input_range R>
152 explicit enhanced_suffix_array(R&& r, const Compare& comp = {})
154 suffix_array<Char, Size, Compare>(std::forward<R>(r), comp))
155 {
156 }
157};
158
159template <std::ranges::input_range R, class Compare = std::less<>>
160enhanced_suffix_array(R, Compare = {})
161 -> enhanced_suffix_array<std::ranges::range_value_t<R>,
162 std::size_t,
163 Compare>;
164
165} // namespace step20
166
167#endif // STEP20_SUFFIX_ARRAY_HPP
Kasai's algorithm for constructing longest common prefix array.
Definition suffix_array.hpp:122
enhanced_suffix_array(R &&r, const Compare &comp={})
Definition suffix_array.hpp:152
enhanced_suffix_array(suffix_array< Char, Size, Compare > &&arr)
Definition suffix_array.hpp:128
std::span< const Size > lcp_array() const
Definition suffix_array.hpp:126
Manber's algorithm for constructing sorted array of non-empty suffixes.
Definition suffix_array.hpp:23
suffix_array(std::basic_string< Char > str, const Compare &comp={})
Definition suffix_array.hpp:33
Size operator[](Size n) const
suffix position
Definition suffix_array.hpp:29
Size size_type
Definition suffix_array.hpp:26
suffix_array(R &&r, const Compare &comp={})
Definition suffix_array.hpp:51
Char value_type
Definition suffix_array.hpp:25
Size size() const
Definition suffix_array.hpp:28
const Compare & comp() const
Definition suffix_array.hpp:30
const Char * data() const
Definition suffix_array.hpp:27
virtual ~suffix_array()=default
std::span< const Size > find(std::ranges::input_range auto &&str) const
Find positions of suffixes starting with substring.
Definition suffix_array.hpp:60
Definition edit_distance.hpp:11
To to(From &&from)
Definition to.hpp:13