1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński
5*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/suffix_array.h"
6*a03ca8b9SKrzysztof Kosiński
7*a03ca8b9SKrzysztof Kosiński #include <stddef.h>
8*a03ca8b9SKrzysztof Kosiński #include <stdint.h>
9*a03ca8b9SKrzysztof Kosiński
10*a03ca8b9SKrzysztof Kosiński #include <algorithm>
11*a03ca8b9SKrzysztof Kosiński #include <initializer_list>
12*a03ca8b9SKrzysztof Kosiński #include <string>
13*a03ca8b9SKrzysztof Kosiński #include <vector>
14*a03ca8b9SKrzysztof Kosiński
15*a03ca8b9SKrzysztof Kosiński #include "testing/gtest/include/gtest/gtest.h"
16*a03ca8b9SKrzysztof Kosiński
17*a03ca8b9SKrzysztof Kosiński namespace zucchini {
18*a03ca8b9SKrzysztof Kosiński
19*a03ca8b9SKrzysztof Kosiński namespace {
20*a03ca8b9SKrzysztof Kosiński
21*a03ca8b9SKrzysztof Kosiński using SLType = InducedSuffixSort::SLType;
22*a03ca8b9SKrzysztof Kosiński
23*a03ca8b9SKrzysztof Kosiński } // namespace
24*a03ca8b9SKrzysztof Kosiński
25*a03ca8b9SKrzysztof Kosiński using ustring = std::basic_string<unsigned char>;
26*a03ca8b9SKrzysztof Kosiński
27*a03ca8b9SKrzysztof Kosiński constexpr uint16_t kNumChar = 256;
28*a03ca8b9SKrzysztof Kosiński
MakeUnsignedString(const std::string & str)29*a03ca8b9SKrzysztof Kosiński ustring MakeUnsignedString(const std::string& str) {
30*a03ca8b9SKrzysztof Kosiński return {str.begin(), str.end()};
31*a03ca8b9SKrzysztof Kosiński }
32*a03ca8b9SKrzysztof Kosiński
33*a03ca8b9SKrzysztof Kosiński template <class T>
MakeVector(const std::initializer_list<T> & ilist)34*a03ca8b9SKrzysztof Kosiński std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
35*a03ca8b9SKrzysztof Kosiński return {ilist.begin(), ilist.end()};
36*a03ca8b9SKrzysztof Kosiński }
37*a03ca8b9SKrzysztof Kosiński
TestSlPartition(std::initializer_list<SLType> expected_sl_partition,std::initializer_list<size_t> expected_lms_indices,std::string str)38*a03ca8b9SKrzysztof Kosiński void TestSlPartition(std::initializer_list<SLType> expected_sl_partition,
39*a03ca8b9SKrzysztof Kosiński std::initializer_list<size_t> expected_lms_indices,
40*a03ca8b9SKrzysztof Kosiński std::string str) {
41*a03ca8b9SKrzysztof Kosiński using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
42*a03ca8b9SKrzysztof Kosiński
43*a03ca8b9SKrzysztof Kosiński std::vector<SLType> sl_partition(str.size());
44*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(expected_lms_indices.size(),
45*a03ca8b9SKrzysztof Kosiński SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar,
46*a03ca8b9SKrzysztof Kosiński sl_partition.rbegin()));
47*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
48*a03ca8b9SKrzysztof Kosiński
49*a03ca8b9SKrzysztof Kosiński std::vector<size_t> lms_indices(expected_lms_indices.size());
50*a03ca8b9SKrzysztof Kosiński SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
51*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
52*a03ca8b9SKrzysztof Kosiński }
53*a03ca8b9SKrzysztof Kosiński
TEST(InducedSuffixSortTest,BuildSLPartition)54*a03ca8b9SKrzysztof Kosiński TEST(InducedSuffixSortTest, BuildSLPartition) {
55*a03ca8b9SKrzysztof Kosiński TestSlPartition({}, {}, "");
56*a03ca8b9SKrzysztof Kosiński TestSlPartition(
57*a03ca8b9SKrzysztof Kosiński {
58*a03ca8b9SKrzysztof Kosiński SLType::LType,
59*a03ca8b9SKrzysztof Kosiński },
60*a03ca8b9SKrzysztof Kosiński {}, "a");
61*a03ca8b9SKrzysztof Kosiński TestSlPartition(
62*a03ca8b9SKrzysztof Kosiński {
63*a03ca8b9SKrzysztof Kosiński SLType::LType,
64*a03ca8b9SKrzysztof Kosiński SLType::LType,
65*a03ca8b9SKrzysztof Kosiński },
66*a03ca8b9SKrzysztof Kosiński {}, "ba");
67*a03ca8b9SKrzysztof Kosiński TestSlPartition(
68*a03ca8b9SKrzysztof Kosiński {
69*a03ca8b9SKrzysztof Kosiński SLType::SType,
70*a03ca8b9SKrzysztof Kosiński SLType::LType,
71*a03ca8b9SKrzysztof Kosiński },
72*a03ca8b9SKrzysztof Kosiński {}, "ab");
73*a03ca8b9SKrzysztof Kosiński TestSlPartition(
74*a03ca8b9SKrzysztof Kosiński {
75*a03ca8b9SKrzysztof Kosiński SLType::SType,
76*a03ca8b9SKrzysztof Kosiński SLType::SType,
77*a03ca8b9SKrzysztof Kosiński SLType::LType,
78*a03ca8b9SKrzysztof Kosiński },
79*a03ca8b9SKrzysztof Kosiński {}, "aab");
80*a03ca8b9SKrzysztof Kosiński TestSlPartition(
81*a03ca8b9SKrzysztof Kosiński {
82*a03ca8b9SKrzysztof Kosiński SLType::LType,
83*a03ca8b9SKrzysztof Kosiński SLType::LType,
84*a03ca8b9SKrzysztof Kosiński SLType::LType,
85*a03ca8b9SKrzysztof Kosiński },
86*a03ca8b9SKrzysztof Kosiński {}, "bba");
87*a03ca8b9SKrzysztof Kosiński TestSlPartition(
88*a03ca8b9SKrzysztof Kosiński {
89*a03ca8b9SKrzysztof Kosiński SLType::LType,
90*a03ca8b9SKrzysztof Kosiński SLType::SType,
91*a03ca8b9SKrzysztof Kosiński SLType::LType,
92*a03ca8b9SKrzysztof Kosiński },
93*a03ca8b9SKrzysztof Kosiński {1}, "bab");
94*a03ca8b9SKrzysztof Kosiński TestSlPartition(
95*a03ca8b9SKrzysztof Kosiński {
96*a03ca8b9SKrzysztof Kosiński SLType::LType,
97*a03ca8b9SKrzysztof Kosiński SLType::SType,
98*a03ca8b9SKrzysztof Kosiński SLType::SType,
99*a03ca8b9SKrzysztof Kosiński SLType::LType,
100*a03ca8b9SKrzysztof Kosiński },
101*a03ca8b9SKrzysztof Kosiński {1}, "baab");
102*a03ca8b9SKrzysztof Kosiński
103*a03ca8b9SKrzysztof Kosiński TestSlPartition(
104*a03ca8b9SKrzysztof Kosiński {
105*a03ca8b9SKrzysztof Kosiński SLType::LType, // zucchini
106*a03ca8b9SKrzysztof Kosiński SLType::LType, // ucchini
107*a03ca8b9SKrzysztof Kosiński SLType::SType, // cchini
108*a03ca8b9SKrzysztof Kosiński SLType::SType, // chini
109*a03ca8b9SKrzysztof Kosiński SLType::SType, // hini
110*a03ca8b9SKrzysztof Kosiński SLType::SType, // ini
111*a03ca8b9SKrzysztof Kosiński SLType::LType, // ni
112*a03ca8b9SKrzysztof Kosiński SLType::LType, // i
113*a03ca8b9SKrzysztof Kosiński },
114*a03ca8b9SKrzysztof Kosiński {2}, "zucchini");
115*a03ca8b9SKrzysztof Kosiński }
116*a03ca8b9SKrzysztof Kosiński
BucketCount(const std::initializer_list<unsigned char> str,uint16_t max_key)117*a03ca8b9SKrzysztof Kosiński std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
118*a03ca8b9SKrzysztof Kosiński uint16_t max_key) {
119*a03ca8b9SKrzysztof Kosiński using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
120*a03ca8b9SKrzysztof Kosiński return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
121*a03ca8b9SKrzysztof Kosiński }
122*a03ca8b9SKrzysztof Kosiński
TEST(InducedSuffixSortTest,BucketCount)123*a03ca8b9SKrzysztof Kosiński TEST(InducedSuffixSortTest, BucketCount) {
124*a03ca8b9SKrzysztof Kosiński using vec = std::vector<size_t>;
125*a03ca8b9SKrzysztof Kosiński
126*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
127*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
128*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
129*a03ca8b9SKrzysztof Kosiński }
130*a03ca8b9SKrzysztof Kosiński
InducedSortSubstring(ustring str)131*a03ca8b9SKrzysztof Kosiński std::vector<size_t> InducedSortSubstring(ustring str) {
132*a03ca8b9SKrzysztof Kosiński using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>;
133*a03ca8b9SKrzysztof Kosiński std::vector<SLType> sl_partition(str.size());
134*a03ca8b9SKrzysztof Kosiński size_t lms_count = SaisImpl::BuildSLPartition(
135*a03ca8b9SKrzysztof Kosiński str.begin(), str.size(), kNumChar, sl_partition.rbegin());
136*a03ca8b9SKrzysztof Kosiński std::vector<size_t> lms_indices(lms_count);
137*a03ca8b9SKrzysztof Kosiński SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
138*a03ca8b9SKrzysztof Kosiński auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar);
139*a03ca8b9SKrzysztof Kosiński
140*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array(str.size());
141*a03ca8b9SKrzysztof Kosiński SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
142*a03ca8b9SKrzysztof Kosiński suffix_array.begin());
143*a03ca8b9SKrzysztof Kosiński
144*a03ca8b9SKrzysztof Kosiński return suffix_array;
145*a03ca8b9SKrzysztof Kosiński }
146*a03ca8b9SKrzysztof Kosiński
TEST(InducedSuffixSortTest,InducedSortSubstring)147*a03ca8b9SKrzysztof Kosiński TEST(InducedSuffixSortTest, InducedSortSubstring) {
148*a03ca8b9SKrzysztof Kosiński using vec = std::vector<size_t>;
149*a03ca8b9SKrzysztof Kosiński
150*a03ca8b9SKrzysztof Kosiński auto us = MakeUnsignedString;
151*a03ca8b9SKrzysztof Kosiński
152*a03ca8b9SKrzysztof Kosiński // L; a$
153*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({0}), InducedSortSubstring(us("a")));
154*a03ca8b9SKrzysztof Kosiński
155*a03ca8b9SKrzysztof Kosiński // SL; ab$, b$
156*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab")));
157*a03ca8b9SKrzysztof Kosiński
158*a03ca8b9SKrzysztof Kosiński // LL; a$, ba$
159*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba")));
160*a03ca8b9SKrzysztof Kosiński
161*a03ca8b9SKrzysztof Kosiński // SLL; a$, aba$, ba$
162*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba")));
163*a03ca8b9SKrzysztof Kosiński
164*a03ca8b9SKrzysztof Kosiński // LSL; ab$, b$, ba
165*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab")));
166*a03ca8b9SKrzysztof Kosiński
167*a03ca8b9SKrzysztof Kosiński // SSL; aab$, ab$, b$
168*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab")));
169*a03ca8b9SKrzysztof Kosiński
170*a03ca8b9SKrzysztof Kosiński // LSSL; aab$, ab$, b$, ba
171*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab")));
172*a03ca8b9SKrzysztof Kosiński }
173*a03ca8b9SKrzysztof Kosiński
174*a03ca8b9SKrzysztof Kosiński template <class Algorithm>
TestSuffixSort(ustring test_str)175*a03ca8b9SKrzysztof Kosiński void TestSuffixSort(ustring test_str) {
176*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array =
177*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<Algorithm>(test_str, kNumChar);
178*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(test_str.size(), suffix_array.size());
179*a03ca8b9SKrzysztof Kosiński
180*a03ca8b9SKrzysztof Kosiński // Expect that I[] is a permutation of [0, len].
181*a03ca8b9SKrzysztof Kosiński std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
182*a03ca8b9SKrzysztof Kosiński std::sort(sorted_suffix.begin(), sorted_suffix.end());
183*a03ca8b9SKrzysztof Kosiński for (size_t i = 0; i < test_str.size(); ++i)
184*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(i, sorted_suffix[i]);
185*a03ca8b9SKrzysztof Kosiński
186*a03ca8b9SKrzysztof Kosiński // Expect that all suffixes are strictly ordered.
187*a03ca8b9SKrzysztof Kosiński auto end = test_str.end();
188*a03ca8b9SKrzysztof Kosiński for (size_t i = 1; i < test_str.size(); ++i) {
189*a03ca8b9SKrzysztof Kosiński auto suf1 = test_str.begin() + suffix_array[i - 1];
190*a03ca8b9SKrzysztof Kosiński auto suf2 = test_str.begin() + suffix_array[i];
191*a03ca8b9SKrzysztof Kosiński bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
192*a03ca8b9SKrzysztof Kosiński EXPECT_TRUE(is_less);
193*a03ca8b9SKrzysztof Kosiński }
194*a03ca8b9SKrzysztof Kosiński }
195*a03ca8b9SKrzysztof Kosiński
196*a03ca8b9SKrzysztof Kosiński constexpr const char* test_strs[] = {
197*a03ca8b9SKrzysztof Kosiński "",
198*a03ca8b9SKrzysztof Kosiński "a",
199*a03ca8b9SKrzysztof Kosiński "aa",
200*a03ca8b9SKrzysztof Kosiński "za",
201*a03ca8b9SKrzysztof Kosiński "CACAO",
202*a03ca8b9SKrzysztof Kosiński "aaaaa",
203*a03ca8b9SKrzysztof Kosiński "banana",
204*a03ca8b9SKrzysztof Kosiński "tobeornottobe",
205*a03ca8b9SKrzysztof Kosiński "The quick brown fox jumps over the lazy dog.",
206*a03ca8b9SKrzysztof Kosiński "elephantelephantelephantelephantelephant",
207*a03ca8b9SKrzysztof Kosiński "walawalawashington",
208*a03ca8b9SKrzysztof Kosiński "-------------------------",
209*a03ca8b9SKrzysztof Kosiński "011010011001011010010110011010010",
210*a03ca8b9SKrzysztof Kosiński "3141592653589793238462643383279502884197169399375105",
211*a03ca8b9SKrzysztof Kosiński "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
212*a03ca8b9SKrzysztof Kosiński "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
213*a03ca8b9SKrzysztof Kosiński "0123456789876543210",
214*a03ca8b9SKrzysztof Kosiński "9876543210123456789",
215*a03ca8b9SKrzysztof Kosiński "aababcabcdabcdeabcdefabcdefg",
216*a03ca8b9SKrzysztof Kosiński "asdhklgalksdjghalksdjghalksdjgh",
217*a03ca8b9SKrzysztof Kosiński };
218*a03ca8b9SKrzysztof Kosiński
TEST(SuffixSortTest,NaiveSuffixSort)219*a03ca8b9SKrzysztof Kosiński TEST(SuffixSortTest, NaiveSuffixSort) {
220*a03ca8b9SKrzysztof Kosiński for (const std::string& test_str : test_strs) {
221*a03ca8b9SKrzysztof Kosiński TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str));
222*a03ca8b9SKrzysztof Kosiński }
223*a03ca8b9SKrzysztof Kosiński }
224*a03ca8b9SKrzysztof Kosiński
TEST(SuffixSortTest,InducedSuffixSortSort)225*a03ca8b9SKrzysztof Kosiński TEST(SuffixSortTest, InducedSuffixSortSort) {
226*a03ca8b9SKrzysztof Kosiński for (const std::string& test_str : test_strs) {
227*a03ca8b9SKrzysztof Kosiński TestSuffixSort<InducedSuffixSort>(MakeUnsignedString(test_str));
228*a03ca8b9SKrzysztof Kosiński }
229*a03ca8b9SKrzysztof Kosiński }
230*a03ca8b9SKrzysztof Kosiński
231*a03ca8b9SKrzysztof Kosiński // Test with sequence that has every character.
TEST(SuffixSortTest,AllChar)232*a03ca8b9SKrzysztof Kosiński TEST(SuffixSortTest, AllChar) {
233*a03ca8b9SKrzysztof Kosiński std::vector<unsigned char> all_char(kNumChar);
234*a03ca8b9SKrzysztof Kosiński std::iota(all_char.begin(), all_char.end(), 0);
235*a03ca8b9SKrzysztof Kosiński
236*a03ca8b9SKrzysztof Kosiński {
237*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array =
238*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(all_char, kNumChar);
239*a03ca8b9SKrzysztof Kosiński for (size_t i = 0; i < kNumChar; ++i)
240*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(i, suffix_array[i]);
241*a03ca8b9SKrzysztof Kosiński }
242*a03ca8b9SKrzysztof Kosiński
243*a03ca8b9SKrzysztof Kosiński std::vector<unsigned char> all_char_reverse(all_char.rbegin(),
244*a03ca8b9SKrzysztof Kosiński all_char.rend());
245*a03ca8b9SKrzysztof Kosiński {
246*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array =
247*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(all_char_reverse, kNumChar);
248*a03ca8b9SKrzysztof Kosiński for (size_t i = 0; i < kNumChar; ++i)
249*a03ca8b9SKrzysztof Kosiński EXPECT_EQ(kNumChar - i - 1, suffix_array[i]);
250*a03ca8b9SKrzysztof Kosiński }
251*a03ca8b9SKrzysztof Kosiński }
252*a03ca8b9SKrzysztof Kosiński
TestSuffixLowerBound(ustring base_str,ustring search_str)253*a03ca8b9SKrzysztof Kosiński void TestSuffixLowerBound(ustring base_str, ustring search_str) {
254*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array =
255*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar);
256*a03ca8b9SKrzysztof Kosiński
257*a03ca8b9SKrzysztof Kosiński auto pos = SuffixLowerBound(suffix_array, base_str.begin(),
258*a03ca8b9SKrzysztof Kosiński search_str.begin(), search_str.end());
259*a03ca8b9SKrzysztof Kosiński
260*a03ca8b9SKrzysztof Kosiński auto end = base_str.end();
261*a03ca8b9SKrzysztof Kosiński if (pos != suffix_array.begin()) {
262*a03ca8b9SKrzysztof Kosiński // Previous suffix is less than |search_str|.
263*a03ca8b9SKrzysztof Kosiński auto suf = base_str.begin() + pos[-1];
264*a03ca8b9SKrzysztof Kosiński bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
265*a03ca8b9SKrzysztof Kosiński search_str.end());
266*a03ca8b9SKrzysztof Kosiński EXPECT_TRUE(is_less);
267*a03ca8b9SKrzysztof Kosiński }
268*a03ca8b9SKrzysztof Kosiński if (pos != suffix_array.end()) {
269*a03ca8b9SKrzysztof Kosiński // Current suffix is greater of equal to |search_str|.
270*a03ca8b9SKrzysztof Kosiński auto suf = base_str.begin() + *pos;
271*a03ca8b9SKrzysztof Kosiński bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
272*a03ca8b9SKrzysztof Kosiński search_str.end());
273*a03ca8b9SKrzysztof Kosiński EXPECT_FALSE(is_less);
274*a03ca8b9SKrzysztof Kosiński }
275*a03ca8b9SKrzysztof Kosiński }
276*a03ca8b9SKrzysztof Kosiński
TEST(SuffixArrayTest,LowerBound)277*a03ca8b9SKrzysztof Kosiński TEST(SuffixArrayTest, LowerBound) {
278*a03ca8b9SKrzysztof Kosiński auto us = MakeUnsignedString;
279*a03ca8b9SKrzysztof Kosiński
280*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us(""), us(""));
281*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us(""), us("a"));
282*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("b"), us(""));
283*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("b"), us("a"));
284*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("b"), us("c"));
285*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("b"), us("bc"));
286*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("aa"), us("a"));
287*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(us("aa"), us("aa"));
288*a03ca8b9SKrzysztof Kosiński
289*a03ca8b9SKrzysztof Kosiński ustring sentence = us("the quick brown fox jumps over the lazy dog.");
290*a03ca8b9SKrzysztof Kosiński // Entire string: exact and unique.
291*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, sentence);
292*a03ca8b9SKrzysztof Kosiński // Empty string: exact and non-unique.
293*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us(""));
294*a03ca8b9SKrzysztof Kosiński // Exact and unique suffix matches.
295*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("."));
296*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the lazy dog."));
297*a03ca8b9SKrzysztof Kosiński // Exact and unique non-suffix matches.
298*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("quick"));
299*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the quick"));
300*a03ca8b9SKrzysztof Kosiński // Partial and unique matches.
301*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("fox jumps with the hosps"));
302*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("xyz"));
303*a03ca8b9SKrzysztof Kosiński // Exact and non-unique match: take lexicographical first.
304*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the"));
305*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us(" "));
306*a03ca8b9SKrzysztof Kosiński // Partial and non-unique match.
307*a03ca8b9SKrzysztof Kosiński // query < "the l"... < "the q"...
308*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the apple"));
309*a03ca8b9SKrzysztof Kosiński // "the l"... < query < "the q"...
310*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the opera"));
311*a03ca8b9SKrzysztof Kosiński // "the l"... < "the q"... < query
312*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("the zebra"));
313*a03ca8b9SKrzysztof Kosiński // Prefix match dominates suffix match (unique).
314*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("over quick brown fox"));
315*a03ca8b9SKrzysztof Kosiński // Empty matchs.
316*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us(","));
317*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("1234"));
318*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX"));
319*a03ca8b9SKrzysztof Kosiński TestSuffixLowerBound(sentence, us("(the"));
320*a03ca8b9SKrzysztof Kosiński }
321*a03ca8b9SKrzysztof Kosiński
TEST(SuffixArrayTest,LowerBoundExact)322*a03ca8b9SKrzysztof Kosiński TEST(SuffixArrayTest, LowerBoundExact) {
323*a03ca8b9SKrzysztof Kosiński for (const std::string& test_str : test_strs) {
324*a03ca8b9SKrzysztof Kosiński ustring test_ustr = MakeUnsignedString(test_str);
325*a03ca8b9SKrzysztof Kosiński
326*a03ca8b9SKrzysztof Kosiński std::vector<size_t> suffix_array =
327*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(test_ustr, kNumChar);
328*a03ca8b9SKrzysztof Kosiński
329*a03ca8b9SKrzysztof Kosiński for (size_t lo = 0; lo < test_str.size(); ++lo) {
330*a03ca8b9SKrzysztof Kosiński for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) {
331*a03ca8b9SKrzysztof Kosiński ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi);
332*a03ca8b9SKrzysztof Kosiński ASSERT_EQ(query.size(), hi - lo);
333*a03ca8b9SKrzysztof Kosiński auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(),
334*a03ca8b9SKrzysztof Kosiński query.begin(), query.end());
335*a03ca8b9SKrzysztof Kosiński EXPECT_TRUE(
336*a03ca8b9SKrzysztof Kosiński std::equal(query.begin(), query.end(), test_ustr.begin() + *pos));
337*a03ca8b9SKrzysztof Kosiński }
338*a03ca8b9SKrzysztof Kosiński }
339*a03ca8b9SKrzysztof Kosiński }
340*a03ca8b9SKrzysztof Kosiński }
341*a03ca8b9SKrzysztof Kosiński
342*a03ca8b9SKrzysztof Kosiński } // namespace zucchini
343