1 use crate::attribute::OwnedAttribute;
2 use crate::name::OwnedName;
3 
4 use std::collections::hash_map::RandomState;
5 use std::collections::HashSet;
6 use std::hash::BuildHasher;
7 use std::hash::Hash;
8 use std::hash::Hasher;
9 
10 /// An ordered set
11 pub(crate) struct AttributesSet {
12     vec: Vec<OwnedAttribute>,
13     /// Uses a no-op hasher, because these u64s are hashes already
14     may_contain: HashSet<u64, U64HasherBuilder>,
15     /// This is real hasher for the `OwnedName`
16     hasher: RandomState,
17 }
18 
19 /// Use linear search and don't allocate `HashSet` if there are few attributes,
20 /// because allocation costs more than a few comparisons.
21 const HASH_THRESHOLD: usize = 8;
22 
23 impl AttributesSet {
new() -> Self24     pub fn new() -> Self {
25         Self {
26             vec: Vec::new(),
27             hasher: RandomState::new(),
28             may_contain: HashSet::default(),
29         }
30     }
31 
hash(&self, val: &OwnedName) -> u6432     fn hash(&self, val: &OwnedName) -> u64 {
33         let mut h = self.hasher.build_hasher();
34         val.hash(&mut h);
35         h.finish()
36     }
37 
len(&self) -> usize38     pub fn len(&self) -> usize {
39         self.vec.len()
40     }
41 
contains(&self, name: &OwnedName) -> bool42     pub fn contains(&self, name: &OwnedName) -> bool {
43         // fall back to linear search only on duplicate or hash collision
44         (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) &&
45             self.vec.iter().any(move |a| &a.name == name)
46     }
47 
push(&mut self, attr: OwnedAttribute)48     pub fn push(&mut self, attr: OwnedAttribute) {
49         if self.vec.len() >= HASH_THRESHOLD {
50             if self.vec.len() == HASH_THRESHOLD {
51                 self.may_contain.reserve(HASH_THRESHOLD * 2);
52                 for attr in &self.vec {
53                     self.may_contain.insert(self.hash(&attr.name));
54                 }
55             }
56             self.may_contain.insert(self.hash(&attr.name));
57         }
58         self.vec.push(attr);
59     }
60 
into_vec(self) -> Vec<OwnedAttribute>61     pub fn into_vec(self) -> Vec<OwnedAttribute> {
62         self.vec
63     }
64 }
65 
66 #[test]
indexset()67 fn indexset() {
68     let mut s = AttributesSet::new();
69     let not_here = OwnedName {
70         local_name: "attr1000".into(),
71         namespace: Some("test".into()),
72         prefix: None,
73     };
74 
75     // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic
76     for i in 0..50000 {
77         let name = OwnedName {
78             local_name: format!("attr{i}"), namespace: None, prefix: None,
79         };
80         assert!(!s.contains(&name));
81 
82         s.push(OwnedAttribute { name, value: String::new() });
83         assert!(!s.contains(&not_here));
84     }
85 
86     assert!(s.contains(&OwnedName {
87         local_name: "attr1234".into(), namespace: None, prefix: None,
88     }));
89     assert!(s.contains(&OwnedName {
90         local_name: "attr0".into(), namespace: None, prefix: None,
91     }));
92     assert!(s.contains(&OwnedName {
93         local_name: "attr49999".into(), namespace: None, prefix: None,
94     }));
95 }
96 
97 /// Hashser that does nothing except passing u64 through
98 struct U64Hasher(u64);
99 
100 impl Hasher for U64Hasher {
finish(&self) -> u64101     fn finish(&self) -> u64 { self.0 }
write(&mut self, slice: &[u8])102     fn write(&mut self, slice: &[u8]) {
103         for &v in slice { self.0 ^= u64::from(v) } // unused in practice
104     }
write_u64(&mut self, i: u64)105     fn write_u64(&mut self, i: u64) {
106         self.0 ^= i;
107     }
108 }
109 
110 #[derive(Default)]
111 struct U64HasherBuilder;
112 
113 impl BuildHasher for U64HasherBuilder {
114     type Hasher = U64Hasher;
build_hasher(&self) -> U64Hasher115     fn build_hasher(&self) -> U64Hasher { U64Hasher(0) }
116 }
117