step20
Loading...
Searching...
No Matches
suffix_tree.hpp
Go to the documentation of this file.
1// Andrew Naplavkov
2
3#ifndef STEP20_SUFFIX_TREE_HPP
4#define STEP20_SUFFIX_TREE_HPP
5
6#include "generator.hpp"
7#include "to.hpp"
8#include <stack>
9#include <string>
10#include <unordered_map>
11
12namespace step20 {
13
15
21template <class Char,
22 std::unsigned_integral Size = std::size_t,
23 class Map = std::unordered_map<Char, Size>>
25public:
26 using value_type = Char;
27 using size_type = Size;
28 const Char* data() const { return str_.data(); }
29 Size size() const { return str_.size(); }
30 bool leaf(Size node) const { return node >= nodes_.size(); }
31 virtual ~suffix_tree() = default;
32
33 void clear() noexcept
34 {
35 str_.clear();
36 nodes_.clear();
37 pos_ = node_ = 0;
38 }
39
42 void push_back(Char ch)
43 try {
44 str_.push_back(ch);
45 if (nodes_.empty())
46 nodes_.emplace_back();
47 auto tie = [this, src = nodes_.size()](Size dest) mutable {
48 if (!leaf(src) && src != dest)
49 nodes_[src++].link = dest;
50 };
51 while (pos_ < size()) {
52 if (Size& node = nodes_[node_].children[str_[pos_]]) {
53 if (skip(node))
54 continue;
55 if (!split(node))
56 return tie(node_);
57 tie(nodes_.size() - 1);
58 }
59 else {
60 node = flip(pos_);
61 tie(node_);
62 }
63 node_ ? node_ = nodes_[node_].link : ++pos_;
64 }
65 }
66 catch (...) {
67 clear();
68 throw;
69 }
70
71 struct slice_type {
72 Size first, last;
73 Size length() const { return last - first; }
74 };
75
76 struct edge_type {
80 };
81
83 slice_type label(Size node) const
84 {
85 return leaf(node) ? slice_type{flip(node), size()} : nodes_[node].label;
86 }
87
89 slice_type labels(const edge_type& edge) const
90 {
91 auto last = label(edge.child_node).last;
92 return {last - edge.labels_len, last};
93 }
94
96
98 std::optional<edge_type> find(std::ranges::forward_range auto&& str) const
99 {
100 if (nodes_.empty())
101 return std::nullopt;
102 auto first = std::ranges::begin(str);
103 auto last = std::ranges::end(str);
104 for (auto edge = edge_type{};;) {
105 auto lbl = label(edge.child_node);
106 edge.labels_len += lbl.length();
107 auto diff = std::mismatch(
108 first, last, data() + lbl.first, data() + lbl.last, eq_);
109 if (diff.first == last)
110 return edge;
111 if (diff.second != data() + lbl.last || leaf(edge.child_node))
112 return std::nullopt;
113 auto it = nodes_[edge.child_node].children.find(*diff.first);
114 if (it == nodes_[edge.child_node].children.end())
115 return std::nullopt;
116 first = diff.first;
117 edge.parent_node = std::exchange(edge.child_node, it->second);
118 }
119 }
120
124 {
125 for (auto stack = std::stack<edge_type>{{start}}; !stack.empty();) {
126 auto edge = stack.top();
127 co_yield edge;
128 stack.pop();
129 if (!leaf(edge.child_node))
130 std::ranges::copy(
131 try_reverse(nodes_[edge.child_node].children) |
132 std::views::transform([&](auto&& item) {
133 return edge_type{
134 edge.child_node,
135 item.second,
136 edge.labels_len + label(item.second).length()};
137 }),
138 emplace_iterator(stack));
139 }
140 }
141
142private:
143 struct node_type {
144 Map children;
145 slice_type label;
146 Size link;
147 };
148
149 inline static auto eq_ = key_equivalence_fn<Map>();
150 std::basic_string<Char> str_;
151 std::vector<node_type> nodes_;
152 Size pos_{}, node_{};
153
154 bool skip(Size node)
155 {
156 Size len = label(node).length();
157 if (size() <= pos_ + len)
158 return false;
159 pos_ += len;
160 node_ = node;
161 return true;
162 }
163
164 bool split(Size& node)
165 {
166 auto lbl = label(node);
167 Size cut = lbl.first + (size() - pos_) - 1;
168 Size back = size() - 1;
169 if (eq_(str_[cut], str_[back]))
170 return false;
171 Size old = std::exchange(node, (Size)nodes_.size());
172 nodes_.push_back({{{str_[cut], leaf(old) ? flip(cut) : old},
173 {str_[back], flip(back)}},
174 {lbl.first, cut}});
175 if (!leaf(old))
176 nodes_[old].label = {cut, lbl.last};
177 return true;
178 }
179};
180
181} // namespace step20
182
183#endif // STEP20_SUFFIX_TREE_HPP
Ukkonen's online algorithm for constructing suffix tree.
Definition suffix_tree.hpp:24
virtual ~suffix_tree()=default
void push_back(Char ch)
Definition suffix_tree.hpp:42
Char value_type
Definition suffix_tree.hpp:26
slice_type labels(const edge_type &edge) const
Definition suffix_tree.hpp:89
std::optional< edge_type > find(std::ranges::forward_range auto &&str) const
Find the minimum depth edge which labels start with substring.
Definition suffix_tree.hpp:98
Size size_type
Definition suffix_tree.hpp:27
const Char * data() const
Definition suffix_tree.hpp:28
bool leaf(Size node) const
Definition suffix_tree.hpp:30
generator< edge_type > depth_first_search(edge_type start) const
Definition suffix_tree.hpp:123
slice_type label(Size node) const
Definition suffix_tree.hpp:83
void clear() noexcept
Definition suffix_tree.hpp:33
Size size() const
Definition suffix_tree.hpp:29
Definition edit_distance.hpp:11
Definition generator.hpp:19
Definition suffix_tree.hpp:76
Size labels_len
number of characters in [root_node .. child_node]
Definition suffix_tree.hpp:79
Size parent_node
Definition suffix_tree.hpp:77
Size child_node
Definition suffix_tree.hpp:78
Definition suffix_tree.hpp:71
Size first
Definition suffix_tree.hpp:72
Size length() const
Definition suffix_tree.hpp:73
Size last
half-open character range
Definition suffix_tree.hpp:72