1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! An implementation of SipHash.
12 
13 use core::cmp;
14 use core::hash;
15 use core::hash::Hasher as _;
16 use core::marker::PhantomData;
17 use core::mem;
18 use core::ptr;
19 use core::u64;
20 
21 /// An implementation of SipHash 1-3.
22 ///
23 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24 #[derive(Debug, Clone, Copy, Default)]
25 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26 pub struct SipHasher13 {
27     hasher: Hasher<Sip13Rounds>,
28 }
29 
30 /// An implementation of SipHash 2-4.
31 ///
32 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33 #[derive(Debug, Clone, Copy, Default)]
34 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35 pub struct SipHasher24 {
36     hasher: Hasher<Sip24Rounds>,
37 }
38 
39 /// An implementation of SipHash 2-4.
40 ///
41 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42 ///
43 /// SipHash is a general-purpose hashing function: it runs at a good
44 /// speed (competitive with Spooky and City) and permits strong _keyed_
45 /// hashing. This lets you key your hashtables from a strong RNG, such as
46 /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47 ///
48 /// Although the SipHash algorithm is considered to be generally strong,
49 /// it is not intended for cryptographic purposes. As such, all
50 /// cryptographic uses of this implementation are _strongly discouraged_.
51 #[derive(Debug, Clone, Copy, Default)]
52 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53 pub struct SipHasher(SipHasher24);
54 
55 #[derive(Debug, Clone, Copy)]
56 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57 struct Hasher<S: Sip> {
58     k0: u64,
59     k1: u64,
60     length: usize, // how many bytes we've processed
61     state: State,  // hash State
62     tail: u64,     // unprocessed bytes le
63     ntail: usize,  // how many bytes in tail are valid
64     _marker: PhantomData<S>,
65 }
66 
67 #[derive(Debug, Clone, Copy)]
68 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69 struct State {
70     // v0, v2 and v1, v3 show up in pairs in the algorithm,
71     // and simd implementations of SipHash will use vectors
72     // of v02 and v13. By placing them in this order in the struct,
73     // the compiler can pick up on just a few simd optimizations by itself.
74     v0: u64,
75     v2: u64,
76     v1: u64,
77     v3: u64,
78 }
79 
80 macro_rules! compress {
81     ($state:expr) => {{
82         compress!($state.v0, $state.v1, $state.v2, $state.v3)
83     }};
84     ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
85         $v0 = $v0.wrapping_add($v1);
86         $v1 = $v1.rotate_left(13);
87         $v1 ^= $v0;
88         $v0 = $v0.rotate_left(32);
89         $v2 = $v2.wrapping_add($v3);
90         $v3 = $v3.rotate_left(16);
91         $v3 ^= $v2;
92         $v0 = $v0.wrapping_add($v3);
93         $v3 = $v3.rotate_left(21);
94         $v3 ^= $v0;
95         $v2 = $v2.wrapping_add($v1);
96         $v1 = $v1.rotate_left(17);
97         $v1 ^= $v2;
98         $v2 = $v2.rotate_left(32);
99     }};
100 }
101 
102 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
103 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
104 /// to load it from a possibly unaligned address.
105 ///
106 /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
107 macro_rules! load_int_le {
108     ($buf:expr, $i:expr, $int_ty:ident) => {{
109         debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
110         let mut data = 0 as $int_ty;
111         ptr::copy_nonoverlapping(
112             $buf.as_ptr().add($i),
113             &mut data as *mut _ as *mut u8,
114             mem::size_of::<$int_ty>(),
115         );
116         data.to_le()
117     }};
118 }
119 
120 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
121 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
122 /// sizes and avoid calling `memcpy`, which is good for speed.
123 ///
124 /// Unsafe because: unchecked indexing at start..start+len
125 #[inline]
u8to64_le(buf: &[u8], start: usize, len: usize) -> u64126 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
127     debug_assert!(len < 8);
128     let mut i = 0; // current byte index (from LSB) in the output u64
129     let mut out = 0;
130     if i + 3 < len {
131         out = load_int_le!(buf, start + i, u32) as u64;
132         i += 4;
133     }
134     if i + 1 < len {
135         out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
136         i += 2
137     }
138     if i < len {
139         out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
140         i += 1;
141     }
142     debug_assert_eq!(i, len);
143     out
144 }
145 
146 impl SipHasher {
147     /// Creates a new `SipHasher` with the two initial keys set to 0.
148     #[inline]
new() -> SipHasher149     pub fn new() -> SipHasher {
150         SipHasher::new_with_keys(0, 0)
151     }
152 
153     /// Creates a `SipHasher` that is keyed off the provided keys.
154     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher155     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
156         SipHasher(SipHasher24::new_with_keys(key0, key1))
157     }
158 
159     /// Creates a `SipHasher` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher160     pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
161         let mut b0 = [0u8; 8];
162         let mut b1 = [0u8; 8];
163         b0.copy_from_slice(&key[0..8]);
164         b1.copy_from_slice(&key[8..16]);
165         let key0 = u64::from_le_bytes(b0);
166         let key1 = u64::from_le_bytes(b1);
167         Self::new_with_keys(key0, key1)
168     }
169 
170     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)171     pub fn keys(&self) -> (u64, u64) {
172         (self.0.hasher.k0, self.0.hasher.k1)
173     }
174 
175     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]176     pub fn key(&self) -> [u8; 16] {
177         let mut bytes = [0u8; 16];
178         bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
179         bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
180         bytes
181     }
182 
183     /// Hash a byte array - This is the easiest and safest way to use SipHash.
184     #[inline]
hash(&self, bytes: &[u8]) -> u64185     pub fn hash(&self, bytes: &[u8]) -> u64 {
186         let mut hasher = self.0.hasher;
187         hasher.write(bytes);
188         hasher.finish()
189     }
190 }
191 
192 impl SipHasher13 {
193     /// Creates a new `SipHasher13` with the two initial keys set to 0.
194     #[inline]
new() -> SipHasher13195     pub fn new() -> SipHasher13 {
196         SipHasher13::new_with_keys(0, 0)
197     }
198 
199     /// Creates a `SipHasher13` that is keyed off the provided keys.
200     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher13201     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202         SipHasher13 {
203             hasher: Hasher::new_with_keys(key0, key1),
204         }
205     }
206 
207     /// Creates a `SipHasher13` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher13208     pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
209         let mut b0 = [0u8; 8];
210         let mut b1 = [0u8; 8];
211         b0.copy_from_slice(&key[0..8]);
212         b1.copy_from_slice(&key[8..16]);
213         let key0 = u64::from_le_bytes(b0);
214         let key1 = u64::from_le_bytes(b1);
215         Self::new_with_keys(key0, key1)
216     }
217 
218     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)219     pub fn keys(&self) -> (u64, u64) {
220         (self.hasher.k0, self.hasher.k1)
221     }
222 
223     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]224     pub fn key(&self) -> [u8; 16] {
225         let mut bytes = [0u8; 16];
226         bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
227         bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
228         bytes
229     }
230 
231     /// Hash a byte array - This is the easiest and safest way to use SipHash.
232     #[inline]
hash(&self, bytes: &[u8]) -> u64233     pub fn hash(&self, bytes: &[u8]) -> u64 {
234         let mut hasher = self.hasher;
235         hasher.write(bytes);
236         hasher.finish()
237     }
238 }
239 
240 impl SipHasher24 {
241     /// Creates a new `SipHasher24` with the two initial keys set to 0.
242     #[inline]
new() -> SipHasher24243     pub fn new() -> SipHasher24 {
244         SipHasher24::new_with_keys(0, 0)
245     }
246 
247     /// Creates a `SipHasher24` that is keyed off the provided keys.
248     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher24249     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
250         SipHasher24 {
251             hasher: Hasher::new_with_keys(key0, key1),
252         }
253     }
254 
255     /// Creates a `SipHasher24` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher24256     pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
257         let mut b0 = [0u8; 8];
258         let mut b1 = [0u8; 8];
259         b0.copy_from_slice(&key[0..8]);
260         b1.copy_from_slice(&key[8..16]);
261         let key0 = u64::from_le_bytes(b0);
262         let key1 = u64::from_le_bytes(b1);
263         Self::new_with_keys(key0, key1)
264     }
265 
266     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)267     pub fn keys(&self) -> (u64, u64) {
268         (self.hasher.k0, self.hasher.k1)
269     }
270 
271     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]272     pub fn key(&self) -> [u8; 16] {
273         let mut bytes = [0u8; 16];
274         bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
275         bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
276         bytes
277     }
278 
279     /// Hash a byte array - This is the easiest and safest way to use SipHash.
280     #[inline]
hash(&self, bytes: &[u8]) -> u64281     pub fn hash(&self, bytes: &[u8]) -> u64 {
282         let mut hasher = self.hasher;
283         hasher.write(bytes);
284         hasher.finish()
285     }
286 }
287 
288 impl<S: Sip> Hasher<S> {
289     #[inline]
new_with_keys(key0: u64, key1: u64) -> Hasher<S>290     fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
291         let mut state = Hasher {
292             k0: key0,
293             k1: key1,
294             length: 0,
295             state: State {
296                 v0: 0,
297                 v1: 0,
298                 v2: 0,
299                 v3: 0,
300             },
301             tail: 0,
302             ntail: 0,
303             _marker: PhantomData,
304         };
305         state.reset();
306         state
307     }
308 
309     #[inline]
reset(&mut self)310     fn reset(&mut self) {
311         self.length = 0;
312         self.state.v0 = self.k0 ^ 0x736f6d6570736575;
313         self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
314         self.state.v2 = self.k0 ^ 0x6c7967656e657261;
315         self.state.v3 = self.k1 ^ 0x7465646279746573;
316         self.ntail = 0;
317     }
318 
319     // A specialized write function for values with size <= 8.
320     //
321     // The hashing of multi-byte integers depends on endianness. E.g.:
322     // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
323     // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
324     //
325     // This function does the right thing for little-endian hardware. On
326     // big-endian hardware `x` must be byte-swapped first to give the right
327     // behaviour. After any byte-swapping, the input must be zero-extended to
328     // 64-bits. The caller is responsible for the byte-swapping and
329     // zero-extension.
330     #[inline]
short_write<T>(&mut self, _x: T, x: u64)331     fn short_write<T>(&mut self, _x: T, x: u64) {
332         let size = mem::size_of::<T>();
333         self.length += size;
334 
335         // The original number must be zero-extended, not sign-extended.
336         debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
337 
338         // The number of bytes needed to fill `self.tail`.
339         let needed = 8 - self.ntail;
340 
341         self.tail |= x << (8 * self.ntail);
342         if size < needed {
343             self.ntail += size;
344             return;
345         }
346 
347         // `self.tail` is full, process it.
348         self.state.v3 ^= self.tail;
349         S::c_rounds(&mut self.state);
350         self.state.v0 ^= self.tail;
351 
352         self.ntail = size - needed;
353         self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
354     }
355 }
356 
357 impl hash::Hasher for SipHasher {
358     #[inline]
write(&mut self, msg: &[u8])359     fn write(&mut self, msg: &[u8]) {
360         self.0.write(msg)
361     }
362 
363     #[inline]
finish(&self) -> u64364     fn finish(&self) -> u64 {
365         self.0.finish()
366     }
367 
368     #[inline]
write_usize(&mut self, i: usize)369     fn write_usize(&mut self, i: usize) {
370         self.0.write_usize(i);
371     }
372 
373     #[inline]
write_u8(&mut self, i: u8)374     fn write_u8(&mut self, i: u8) {
375         self.0.write_u8(i);
376     }
377 
378     #[inline]
write_u16(&mut self, i: u16)379     fn write_u16(&mut self, i: u16) {
380         self.0.write_u16(i);
381     }
382 
383     #[inline]
write_u32(&mut self, i: u32)384     fn write_u32(&mut self, i: u32) {
385         self.0.write_u32(i);
386     }
387 
388     #[inline]
write_u64(&mut self, i: u64)389     fn write_u64(&mut self, i: u64) {
390         self.0.write_u64(i);
391     }
392 }
393 
394 impl hash::Hasher for SipHasher13 {
395     #[inline]
write(&mut self, msg: &[u8])396     fn write(&mut self, msg: &[u8]) {
397         self.hasher.write(msg)
398     }
399 
400     #[inline]
finish(&self) -> u64401     fn finish(&self) -> u64 {
402         self.hasher.finish()
403     }
404 
405     #[inline]
write_usize(&mut self, i: usize)406     fn write_usize(&mut self, i: usize) {
407         self.hasher.write_usize(i);
408     }
409 
410     #[inline]
write_u8(&mut self, i: u8)411     fn write_u8(&mut self, i: u8) {
412         self.hasher.write_u8(i);
413     }
414 
415     #[inline]
write_u16(&mut self, i: u16)416     fn write_u16(&mut self, i: u16) {
417         self.hasher.write_u16(i);
418     }
419 
420     #[inline]
write_u32(&mut self, i: u32)421     fn write_u32(&mut self, i: u32) {
422         self.hasher.write_u32(i);
423     }
424 
425     #[inline]
write_u64(&mut self, i: u64)426     fn write_u64(&mut self, i: u64) {
427         self.hasher.write_u64(i);
428     }
429 }
430 
431 impl hash::Hasher for SipHasher24 {
432     #[inline]
write(&mut self, msg: &[u8])433     fn write(&mut self, msg: &[u8]) {
434         self.hasher.write(msg)
435     }
436 
437     #[inline]
finish(&self) -> u64438     fn finish(&self) -> u64 {
439         self.hasher.finish()
440     }
441 
442     #[inline]
write_usize(&mut self, i: usize)443     fn write_usize(&mut self, i: usize) {
444         self.hasher.write_usize(i);
445     }
446 
447     #[inline]
write_u8(&mut self, i: u8)448     fn write_u8(&mut self, i: u8) {
449         self.hasher.write_u8(i);
450     }
451 
452     #[inline]
write_u16(&mut self, i: u16)453     fn write_u16(&mut self, i: u16) {
454         self.hasher.write_u16(i);
455     }
456 
457     #[inline]
write_u32(&mut self, i: u32)458     fn write_u32(&mut self, i: u32) {
459         self.hasher.write_u32(i);
460     }
461 
462     #[inline]
write_u64(&mut self, i: u64)463     fn write_u64(&mut self, i: u64) {
464         self.hasher.write_u64(i);
465     }
466 }
467 
468 impl<S: Sip> hash::Hasher for Hasher<S> {
469     #[inline]
write_usize(&mut self, i: usize)470     fn write_usize(&mut self, i: usize) {
471         self.short_write(i, i.to_le() as u64);
472     }
473 
474     #[inline]
write_u8(&mut self, i: u8)475     fn write_u8(&mut self, i: u8) {
476         self.short_write(i, i as u64);
477     }
478 
479     #[inline]
write_u32(&mut self, i: u32)480     fn write_u32(&mut self, i: u32) {
481         self.short_write(i, i.to_le() as u64);
482     }
483 
484     #[inline]
write_u64(&mut self, i: u64)485     fn write_u64(&mut self, i: u64) {
486         self.short_write(i, i.to_le());
487     }
488 
489     #[inline]
write(&mut self, msg: &[u8])490     fn write(&mut self, msg: &[u8]) {
491         let length = msg.len();
492         self.length += length;
493 
494         let mut needed = 0;
495 
496         if self.ntail != 0 {
497             needed = 8 - self.ntail;
498             self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
499             if length < needed {
500                 self.ntail += length;
501                 return;
502             } else {
503                 self.state.v3 ^= self.tail;
504                 S::c_rounds(&mut self.state);
505                 self.state.v0 ^= self.tail;
506                 self.ntail = 0;
507             }
508         }
509 
510         // Buffered tail is now flushed, process new input.
511         let len = length - needed;
512         let left = len & 0x7;
513 
514         let mut i = needed;
515         while i < len - left {
516             let mi = unsafe { load_int_le!(msg, i, u64) };
517 
518             self.state.v3 ^= mi;
519             S::c_rounds(&mut self.state);
520             self.state.v0 ^= mi;
521 
522             i += 8;
523         }
524 
525         self.tail = unsafe { u8to64_le(msg, i, left) };
526         self.ntail = left;
527     }
528 
529     #[inline]
finish(&self) -> u64530     fn finish(&self) -> u64 {
531         let mut state = self.state;
532 
533         let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
534 
535         state.v3 ^= b;
536         S::c_rounds(&mut state);
537         state.v0 ^= b;
538 
539         state.v2 ^= 0xff;
540         S::d_rounds(&mut state);
541 
542         state.v0 ^ state.v1 ^ state.v2 ^ state.v3
543     }
544 }
545 
546 impl<S: Sip> Default for Hasher<S> {
547     /// Creates a `Hasher<S>` with the two initial keys set to 0.
548     #[inline]
default() -> Hasher<S>549     fn default() -> Hasher<S> {
550         Hasher::new_with_keys(0, 0)
551     }
552 }
553 
554 #[doc(hidden)]
555 trait Sip {
c_rounds(_: &mut State)556     fn c_rounds(_: &mut State);
d_rounds(_: &mut State)557     fn d_rounds(_: &mut State);
558 }
559 
560 #[derive(Debug, Clone, Copy, Default)]
561 struct Sip13Rounds;
562 
563 impl Sip for Sip13Rounds {
564     #[inline]
c_rounds(state: &mut State)565     fn c_rounds(state: &mut State) {
566         compress!(state);
567     }
568 
569     #[inline]
d_rounds(state: &mut State)570     fn d_rounds(state: &mut State) {
571         compress!(state);
572         compress!(state);
573         compress!(state);
574     }
575 }
576 
577 #[derive(Debug, Clone, Copy, Default)]
578 struct Sip24Rounds;
579 
580 impl Sip for Sip24Rounds {
581     #[inline]
c_rounds(state: &mut State)582     fn c_rounds(state: &mut State) {
583         compress!(state);
584         compress!(state);
585     }
586 
587     #[inline]
d_rounds(state: &mut State)588     fn d_rounds(state: &mut State) {
589         compress!(state);
590         compress!(state);
591         compress!(state);
592         compress!(state);
593     }
594 }
595