1  /*!
2  Contains architecture independent routines.
3  
4  These routines are often used as a "fallback" implementation when the more
5  specialized architecture dependent routines are unavailable.
6  */
7  
8  pub mod memchr;
9  pub mod packedpair;
10  pub mod rabinkarp;
11  #[cfg(feature = "alloc")]
12  pub mod shiftor;
13  pub mod twoway;
14  
15  /// Returns true if and only if `needle` is a prefix of `haystack`.
16  ///
17  /// This uses a latency optimized variant of `memcmp` internally which *might*
18  /// make this faster for very short strings.
19  ///
20  /// # Inlining
21  ///
22  /// This routine is marked `inline(always)`. If you want to call this function
23  /// in a way that is not always inlined, you'll need to wrap a call to it in
24  /// another function that is marked as `inline(never)` or just `inline`.
25  #[inline(always)]
is_prefix(haystack: &[u8], needle: &[u8]) -> bool26  pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool {
27      needle.len() <= haystack.len()
28          && is_equal(&haystack[..needle.len()], needle)
29  }
30  
31  /// Returns true if and only if `needle` is a suffix of `haystack`.
32  ///
33  /// This uses a latency optimized variant of `memcmp` internally which *might*
34  /// make this faster for very short strings.
35  ///
36  /// # Inlining
37  ///
38  /// This routine is marked `inline(always)`. If you want to call this function
39  /// in a way that is not always inlined, you'll need to wrap a call to it in
40  /// another function that is marked as `inline(never)` or just `inline`.
41  #[inline(always)]
is_suffix(haystack: &[u8], needle: &[u8]) -> bool42  pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool {
43      needle.len() <= haystack.len()
44          && is_equal(&haystack[haystack.len() - needle.len()..], needle)
45  }
46  
47  /// Compare corresponding bytes in `x` and `y` for equality.
48  ///
49  /// That is, this returns true if and only if `x.len() == y.len()` and
50  /// `x[i] == y[i]` for all `0 <= i < x.len()`.
51  ///
52  /// # Inlining
53  ///
54  /// This routine is marked `inline(always)`. If you want to call this function
55  /// in a way that is not always inlined, you'll need to wrap a call to it in
56  /// another function that is marked as `inline(never)` or just `inline`.
57  ///
58  /// # Motivation
59  ///
60  /// Why not use slice equality instead? Well, slice equality usually results in
61  /// a call out to the current platform's `libc` which might not be inlineable
62  /// or have other overhead. This routine isn't guaranteed to be a win, but it
63  /// might be in some cases.
64  #[inline(always)]
is_equal(x: &[u8], y: &[u8]) -> bool65  pub fn is_equal(x: &[u8], y: &[u8]) -> bool {
66      if x.len() != y.len() {
67          return false;
68      }
69      // SAFETY: Our pointers are derived directly from borrowed slices which
70      // uphold all of our safety guarantees except for length. We account for
71      // length with the check above.
72      unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) }
73  }
74  
75  /// Compare `n` bytes at the given pointers for equality.
76  ///
77  /// This returns true if and only if `*x.add(i) == *y.add(i)` for all
78  /// `0 <= i < n`.
79  ///
80  /// # Inlining
81  ///
82  /// This routine is marked `inline(always)`. If you want to call this function
83  /// in a way that is not always inlined, you'll need to wrap a call to it in
84  /// another function that is marked as `inline(never)` or just `inline`.
85  ///
86  /// # Motivation
87  ///
88  /// Why not use slice equality instead? Well, slice equality usually results in
89  /// a call out to the current platform's `libc` which might not be inlineable
90  /// or have other overhead. This routine isn't guaranteed to be a win, but it
91  /// might be in some cases.
92  ///
93  /// # Safety
94  ///
95  /// * Both `x` and `y` must be valid for reads of up to `n` bytes.
96  /// * Both `x` and `y` must point to an initialized value.
97  /// * Both `x` and `y` must each point to an allocated object and
98  /// must either be in bounds or at most one byte past the end of the
99  /// allocated object. `x` and `y` do not need to point to the same allocated
100  /// object, but they may.
101  /// * Both `x` and `y` must be _derived from_ a pointer to their respective
102  /// allocated objects.
103  /// * The distance between `x` and `x+n` must not overflow `isize`. Similarly
104  /// for `y` and `y+n`.
105  /// * The distance being in bounds must not rely on "wrapping around" the
106  /// address space.
107  #[inline(always)]
is_equal_raw( mut x: *const u8, mut y: *const u8, mut n: usize, ) -> bool108  pub unsafe fn is_equal_raw(
109      mut x: *const u8,
110      mut y: *const u8,
111      mut n: usize,
112  ) -> bool {
113      // When we have 4 or more bytes to compare, then proceed in chunks of 4 at
114      // a time using unaligned loads.
115      //
116      // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
117      // that this particular version of memcmp is likely to be called with tiny
118      // needles. That means that if we do 8 byte loads, then a higher proportion
119      // of memcmp calls will use the slower variant above. With that said, this
120      // is a hypothesis and is only loosely supported by benchmarks. There's
121      // likely some improvement that could be made here. The main thing here
122      // though is to optimize for latency, not throughput.
123  
124      // SAFETY: The caller is responsible for ensuring the pointers we get are
125      // valid and readable for at least `n` bytes. We also do unaligned loads,
126      // so there's no need to ensure we're aligned. (This is justified by this
127      // routine being specifically for short strings.)
128      while n >= 4 {
129          let vx = x.cast::<u32>().read_unaligned();
130          let vy = y.cast::<u32>().read_unaligned();
131          if vx != vy {
132              return false;
133          }
134          x = x.add(4);
135          y = y.add(4);
136          n -= 4;
137      }
138      // If we don't have enough bytes to do 4-byte at a time loads, then
139      // do partial loads. Note that I used to have a byte-at-a-time
140      // loop here and that turned out to be quite a bit slower for the
141      // memmem/pathological/defeat-simple-vector-alphabet benchmark.
142      if n >= 2 {
143          let vx = x.cast::<u16>().read_unaligned();
144          let vy = y.cast::<u16>().read_unaligned();
145          if vx != vy {
146              return false;
147          }
148          x = x.add(2);
149          y = y.add(2);
150          n -= 2;
151      }
152      if n > 0 {
153          if x.read() != y.read() {
154              return false;
155          }
156      }
157      true
158  }
159  
160  #[cfg(test)]
161  mod tests {
162      use super::*;
163  
164      #[test]
equals_different_lengths()165      fn equals_different_lengths() {
166          assert!(!is_equal(b"", b"a"));
167          assert!(!is_equal(b"a", b""));
168          assert!(!is_equal(b"ab", b"a"));
169          assert!(!is_equal(b"a", b"ab"));
170      }
171  
172      #[test]
equals_mismatch()173      fn equals_mismatch() {
174          let one_mismatch = [
175              (&b"a"[..], &b"x"[..]),
176              (&b"ab"[..], &b"ax"[..]),
177              (&b"abc"[..], &b"abx"[..]),
178              (&b"abcd"[..], &b"abcx"[..]),
179              (&b"abcde"[..], &b"abcdx"[..]),
180              (&b"abcdef"[..], &b"abcdex"[..]),
181              (&b"abcdefg"[..], &b"abcdefx"[..]),
182              (&b"abcdefgh"[..], &b"abcdefgx"[..]),
183              (&b"abcdefghi"[..], &b"abcdefghx"[..]),
184              (&b"abcdefghij"[..], &b"abcdefghix"[..]),
185              (&b"abcdefghijk"[..], &b"abcdefghijx"[..]),
186              (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]),
187              (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]),
188              (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]),
189          ];
190          for (x, y) in one_mismatch {
191              assert_eq!(x.len(), y.len(), "lengths should match");
192              assert!(!is_equal(x, y));
193              assert!(!is_equal(y, x));
194          }
195      }
196  
197      #[test]
equals_yes()198      fn equals_yes() {
199          assert!(is_equal(b"", b""));
200          assert!(is_equal(b"a", b"a"));
201          assert!(is_equal(b"ab", b"ab"));
202          assert!(is_equal(b"abc", b"abc"));
203          assert!(is_equal(b"abcd", b"abcd"));
204          assert!(is_equal(b"abcde", b"abcde"));
205          assert!(is_equal(b"abcdef", b"abcdef"));
206          assert!(is_equal(b"abcdefg", b"abcdefg"));
207          assert!(is_equal(b"abcdefgh", b"abcdefgh"));
208          assert!(is_equal(b"abcdefghi", b"abcdefghi"));
209      }
210  
211      #[test]
prefix()212      fn prefix() {
213          assert!(is_prefix(b"", b""));
214          assert!(is_prefix(b"a", b""));
215          assert!(is_prefix(b"ab", b""));
216          assert!(is_prefix(b"foo", b"foo"));
217          assert!(is_prefix(b"foobar", b"foo"));
218  
219          assert!(!is_prefix(b"foo", b"fob"));
220          assert!(!is_prefix(b"foobar", b"fob"));
221      }
222  
223      #[test]
suffix()224      fn suffix() {
225          assert!(is_suffix(b"", b""));
226          assert!(is_suffix(b"a", b""));
227          assert!(is_suffix(b"ab", b""));
228          assert!(is_suffix(b"foo", b"foo"));
229          assert!(is_suffix(b"foobar", b"bar"));
230  
231          assert!(!is_suffix(b"foo", b"goo"));
232          assert!(!is_suffix(b"foobar", b"gar"));
233      }
234  }
235