1 //! This library implements string similarity metrics.
2 
3 #![forbid(unsafe_code)]
4 #![allow(
5     // these casts are sometimes needed. They restrict the length of input iterators
6     // but there isn't really any way around this except for always working with
7     // 128 bit types
8     clippy::cast_possible_wrap,
9     clippy::cast_sign_loss,
10     clippy::cast_precision_loss,
11     // not practical
12     clippy::needless_pass_by_value,
13     clippy::similar_names,
14     // noisy
15     clippy::missing_errors_doc,
16     clippy::missing_panics_doc,
17     clippy::must_use_candidate,
18     // todo https://github.com/rapidfuzz/strsim-rs/issues/59
19     clippy::range_plus_one
20 )]
21 
22 use std::char;
23 use std::cmp::{max, min};
24 use std::collections::HashMap;
25 use std::convert::TryFrom;
26 use std::error::Error;
27 use std::fmt::{self, Display, Formatter};
28 use std::hash::Hash;
29 use std::mem;
30 use std::str::Chars;
31 
32 #[derive(Debug, PartialEq)]
33 pub enum StrSimError {
34     DifferentLengthArgs,
35 }
36 
37 impl Display for StrSimError {
fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error>38     fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
39         let text = match self {
40             StrSimError::DifferentLengthArgs => "Differing length arguments provided",
41         };
42 
43         write!(fmt, "{}", text)
44     }
45 }
46 
47 impl Error for StrSimError {}
48 
49 pub type HammingResult = Result<usize, StrSimError>;
50 
51 /// Calculates the number of positions in the two sequences where the elements
52 /// differ. Returns an error if the sequences have different lengths.
generic_hamming<Iter1, Iter2, Elem1, Elem2>(a: Iter1, b: Iter2) -> HammingResult where Iter1: IntoIterator<Item = Elem1>, Iter2: IntoIterator<Item = Elem2>, Elem1: PartialEq<Elem2>,53 pub fn generic_hamming<Iter1, Iter2, Elem1, Elem2>(a: Iter1, b: Iter2) -> HammingResult
54 where
55     Iter1: IntoIterator<Item = Elem1>,
56     Iter2: IntoIterator<Item = Elem2>,
57     Elem1: PartialEq<Elem2>,
58 {
59     let (mut ita, mut itb) = (a.into_iter(), b.into_iter());
60     let mut count = 0;
61     loop {
62         match (ita.next(), itb.next()) {
63             (Some(x), Some(y)) => {
64                 if x != y {
65                     count += 1;
66                 }
67             }
68             (None, None) => return Ok(count),
69             _ => return Err(StrSimError::DifferentLengthArgs),
70         }
71     }
72 }
73 
74 /// Calculates the number of positions in the two strings where the characters
75 /// differ. Returns an error if the strings have different lengths.
76 ///
77 /// ```
78 /// use strsim::{hamming, StrSimError::DifferentLengthArgs};
79 ///
80 /// assert_eq!(Ok(3), hamming("hamming", "hammers"));
81 ///
82 /// assert_eq!(Err(DifferentLengthArgs), hamming("hamming", "ham"));
83 /// ```
hamming(a: &str, b: &str) -> HammingResult84 pub fn hamming(a: &str, b: &str) -> HammingResult {
85     generic_hamming(a.chars(), b.chars())
86 }
87 
88 /// Calculates the Jaro similarity between two sequences. The returned value
89 /// is between 0.0 and 1.0 (higher value means more similar).
generic_jaro<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64 where &'a Iter1: IntoIterator<Item = Elem1>, &'b Iter2: IntoIterator<Item = Elem2>, Elem1: PartialEq<Elem2>,90 pub fn generic_jaro<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
91 where
92     &'a Iter1: IntoIterator<Item = Elem1>,
93     &'b Iter2: IntoIterator<Item = Elem2>,
94     Elem1: PartialEq<Elem2>,
95 {
96     let a_len = a.into_iter().count();
97     let b_len = b.into_iter().count();
98 
99     if a_len == 0 && b_len == 0 {
100         return 1.0;
101     } else if a_len == 0 || b_len == 0 {
102         return 0.0;
103     }
104 
105     let mut search_range = max(a_len, b_len) / 2;
106     search_range = search_range.saturating_sub(1);
107 
108     // combine memory allocations to reduce runtime
109     let mut flags_memory = vec![false; a_len + b_len];
110     let (a_flags, b_flags) = flags_memory.split_at_mut(a_len);
111 
112     let mut matches = 0_usize;
113 
114     for (i, a_elem) in a.into_iter().enumerate() {
115         // prevent integer wrapping
116         let min_bound = if i > search_range {
117             i - search_range
118         } else {
119             0
120         };
121 
122         let max_bound = min(b_len, i + search_range + 1);
123 
124         for (j, b_elem) in b.into_iter().enumerate().take(max_bound) {
125             if min_bound <= j && a_elem == b_elem && !b_flags[j] {
126                 a_flags[i] = true;
127                 b_flags[j] = true;
128                 matches += 1;
129                 break;
130             }
131         }
132     }
133 
134     let mut transpositions = 0_usize;
135     if matches != 0 {
136         let mut b_iter = b_flags.iter().zip(b);
137         for (a_flag, ch1) in a_flags.iter().zip(a) {
138             if *a_flag {
139                 loop {
140                     if let Some((b_flag, ch2)) = b_iter.next() {
141                         if !*b_flag {
142                             continue;
143                         }
144 
145                         if ch1 != ch2 {
146                             transpositions += 1;
147                         }
148                         break;
149                     }
150                 }
151             }
152         }
153     }
154     transpositions /= 2;
155 
156     if matches == 0 {
157         0.0
158     } else {
159         ((matches as f64 / a_len as f64)
160             + (matches as f64 / b_len as f64)
161             + ((matches - transpositions) as f64 / matches as f64))
162             / 3.0
163     }
164 }
165 
166 struct StringWrapper<'a>(&'a str);
167 
168 impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
169     type Item = char;
170     type IntoIter = Chars<'b>;
171 
into_iter(self) -> Self::IntoIter172     fn into_iter(self) -> Self::IntoIter {
173         self.0.chars()
174     }
175 }
176 
177 /// Calculates the Jaro similarity between two strings. The returned value
178 /// is between 0.0 and 1.0 (higher value means more similar).
179 ///
180 /// ```
181 /// use strsim::jaro;
182 ///
183 /// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
184 ///         0.001);
185 /// ```
jaro(a: &str, b: &str) -> f64186 pub fn jaro(a: &str, b: &str) -> f64 {
187     generic_jaro(&StringWrapper(a), &StringWrapper(b))
188 }
189 
190 /// Like Jaro but gives a boost to sequences that have a common prefix.
generic_jaro_winkler<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64 where &'a Iter1: IntoIterator<Item = Elem1>, &'b Iter2: IntoIterator<Item = Elem2>, Elem1: PartialEq<Elem2>,191 pub fn generic_jaro_winkler<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
192 where
193     &'a Iter1: IntoIterator<Item = Elem1>,
194     &'b Iter2: IntoIterator<Item = Elem2>,
195     Elem1: PartialEq<Elem2>,
196 {
197     let sim = generic_jaro(a, b);
198 
199     if sim > 0.7 {
200         let prefix_length = a
201             .into_iter()
202             .take(4)
203             .zip(b)
204             .take_while(|(a_elem, b_elem)| a_elem == b_elem)
205             .count();
206 
207         sim + 0.1 * prefix_length as f64 * (1.0 - sim)
208     } else {
209         sim
210     }
211 }
212 
213 /// Like Jaro but gives a boost to strings that have a common prefix.
214 ///
215 /// ```
216 /// use strsim::jaro_winkler;
217 ///
218 /// assert!((0.866 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
219 ///         0.001);
220 /// ```
jaro_winkler(a: &str, b: &str) -> f64221 pub fn jaro_winkler(a: &str, b: &str) -> f64 {
222     generic_jaro_winkler(&StringWrapper(a), &StringWrapper(b))
223 }
224 
225 /// Calculates the minimum number of insertions, deletions, and substitutions
226 /// required to change one sequence into the other.
227 ///
228 /// ```
229 /// use strsim::generic_levenshtein;
230 ///
231 /// assert_eq!(3, generic_levenshtein(&[1,2,3], &[1,2,3,4,5,6]));
232 /// ```
generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> usize where &'a Iter1: IntoIterator<Item = Elem1>, &'b Iter2: IntoIterator<Item = Elem2>, Elem1: PartialEq<Elem2>,233 pub fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> usize
234 where
235     &'a Iter1: IntoIterator<Item = Elem1>,
236     &'b Iter2: IntoIterator<Item = Elem2>,
237     Elem1: PartialEq<Elem2>,
238 {
239     let b_len = b.into_iter().count();
240 
241     let mut cache: Vec<usize> = (1..b_len + 1).collect();
242 
243     let mut result = b_len;
244 
245     for (i, a_elem) in a.into_iter().enumerate() {
246         result = i + 1;
247         let mut distance_b = i;
248 
249         for (j, b_elem) in b.into_iter().enumerate() {
250             let cost = usize::from(a_elem != b_elem);
251             let distance_a = distance_b + cost;
252             distance_b = cache[j];
253             result = min(result + 1, min(distance_a, distance_b + 1));
254             cache[j] = result;
255         }
256     }
257 
258     result
259 }
260 
261 /// Calculates the minimum number of insertions, deletions, and substitutions
262 /// required to change one string into the other.
263 ///
264 /// ```
265 /// use strsim::levenshtein;
266 ///
267 /// assert_eq!(3, levenshtein("kitten", "sitting"));
268 /// ```
levenshtein(a: &str, b: &str) -> usize269 pub fn levenshtein(a: &str, b: &str) -> usize {
270     generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
271 }
272 
273 /// Calculates a normalized score of the Levenshtein algorithm between 0.0 and
274 /// 1.0 (inclusive), where 1.0 means the strings are the same.
275 ///
276 /// ```
277 /// use strsim::normalized_levenshtein;
278 ///
279 /// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
280 /// assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
281 /// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
282 /// assert!(normalized_levenshtein("first", "").abs() < 0.00001);
283 /// assert!((normalized_levenshtein("string", "string") - 1.0).abs() < 0.00001);
284 /// ```
normalized_levenshtein(a: &str, b: &str) -> f64285 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
286     if a.is_empty() && b.is_empty() {
287         return 1.0;
288     }
289     1.0 - (levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
290 }
291 
292 /// Like Levenshtein but allows for adjacent transpositions. Each substring can
293 /// only be edited once.
294 ///
295 /// ```
296 /// use strsim::osa_distance;
297 ///
298 /// assert_eq!(3, osa_distance("ab", "bca"));
299 /// ```
osa_distance(a: &str, b: &str) -> usize300 pub fn osa_distance(a: &str, b: &str) -> usize {
301     let b_len = b.chars().count();
302     // 0..=b_len behaves like 0..b_len.saturating_add(1) which could be a different size
303     // this leads to significantly worse code gen when swapping the vectors below
304     let mut prev_two_distances: Vec<usize> = (0..b_len + 1).collect();
305     let mut prev_distances: Vec<usize> = (0..b_len + 1).collect();
306     let mut curr_distances: Vec<usize> = vec![0; b_len + 1];
307 
308     let mut prev_a_char = char::MAX;
309     let mut prev_b_char = char::MAX;
310 
311     for (i, a_char) in a.chars().enumerate() {
312         curr_distances[0] = i + 1;
313 
314         for (j, b_char) in b.chars().enumerate() {
315             let cost = usize::from(a_char != b_char);
316             curr_distances[j + 1] = min(
317                 curr_distances[j] + 1,
318                 min(prev_distances[j + 1] + 1, prev_distances[j] + cost),
319             );
320             if i > 0 && j > 0 && a_char != b_char && a_char == prev_b_char && b_char == prev_a_char
321             {
322                 curr_distances[j + 1] = min(curr_distances[j + 1], prev_two_distances[j - 1] + 1);
323             }
324 
325             prev_b_char = b_char;
326         }
327 
328         mem::swap(&mut prev_two_distances, &mut prev_distances);
329         mem::swap(&mut prev_distances, &mut curr_distances);
330         prev_a_char = a_char;
331     }
332 
333     // access prev_distances instead of curr_distances since we swapped
334     // them above. In case a is empty this would still contain the correct value
335     // from initializing the last element to b_len
336     prev_distances[b_len]
337 }
338 
339 /* Returns the final index for a value in a single vector that represents a fixed
340 2d grid */
flat_index(i: usize, j: usize, width: usize) -> usize341 fn flat_index(i: usize, j: usize, width: usize) -> usize {
342     j * width + i
343 }
344 
345 /// Like optimal string alignment, but substrings can be edited an unlimited
346 /// number of times, and the triangle inequality holds.
347 ///
348 /// ```
349 /// use strsim::generic_damerau_levenshtein;
350 ///
351 /// assert_eq!(2, generic_damerau_levenshtein(&[1,2], &[2,3,1]));
352 /// ```
generic_damerau_levenshtein<Elem>(a_elems: &[Elem], b_elems: &[Elem]) -> usize where Elem: Eq + Hash + Clone,353 pub fn generic_damerau_levenshtein<Elem>(a_elems: &[Elem], b_elems: &[Elem]) -> usize
354 where
355     Elem: Eq + Hash + Clone,
356 {
357     let a_len = a_elems.len();
358     let b_len = b_elems.len();
359 
360     if a_len == 0 {
361         return b_len;
362     }
363     if b_len == 0 {
364         return a_len;
365     }
366 
367     let width = a_len + 2;
368     let mut distances = vec![0; (a_len + 2) * (b_len + 2)];
369     let max_distance = a_len + b_len;
370     distances[0] = max_distance;
371 
372     for i in 0..(a_len + 1) {
373         distances[flat_index(i + 1, 0, width)] = max_distance;
374         distances[flat_index(i + 1, 1, width)] = i;
375     }
376 
377     for j in 0..(b_len + 1) {
378         distances[flat_index(0, j + 1, width)] = max_distance;
379         distances[flat_index(1, j + 1, width)] = j;
380     }
381 
382     let mut elems: HashMap<Elem, usize> = HashMap::with_capacity(64);
383 
384     for i in 1..(a_len + 1) {
385         let mut db = 0;
386 
387         for j in 1..(b_len + 1) {
388             let k = match elems.get(&b_elems[j - 1]) {
389                 Some(&value) => value,
390                 None => 0,
391             };
392 
393             let insertion_cost = distances[flat_index(i, j + 1, width)] + 1;
394             let deletion_cost = distances[flat_index(i + 1, j, width)] + 1;
395             let transposition_cost =
396                 distances[flat_index(k, db, width)] + (i - k - 1) + 1 + (j - db - 1);
397 
398             let mut substitution_cost = distances[flat_index(i, j, width)] + 1;
399             if a_elems[i - 1] == b_elems[j - 1] {
400                 db = j;
401                 substitution_cost -= 1;
402             }
403 
404             distances[flat_index(i + 1, j + 1, width)] = min(
405                 substitution_cost,
406                 min(insertion_cost, min(deletion_cost, transposition_cost)),
407             );
408         }
409 
410         elems.insert(a_elems[i - 1].clone(), i);
411     }
412 
413     distances[flat_index(a_len + 1, b_len + 1, width)]
414 }
415 
416 #[derive(Clone, Copy, PartialEq, Eq)]
417 struct RowId {
418     val: isize,
419 }
420 
421 impl Default for RowId {
default() -> Self422     fn default() -> Self {
423         Self { val: -1 }
424     }
425 }
426 
427 #[derive(Default, Clone)]
428 struct GrowingHashmapMapElemChar<ValueType> {
429     key: u32,
430     value: ValueType,
431 }
432 
433 /// specialized hashmap to store user provided types
434 /// this implementation relies on a couple of base assumptions in order to simplify the implementation
435 /// - the hashmap does not have an upper limit of included items
436 /// - the default value for the `ValueType` can be used as a dummy value to indicate an empty cell
437 /// - elements can't be removed
438 /// - only allocates memory on first write access.
439 ///   This improves performance for hashmaps that are never written to
440 struct GrowingHashmapChar<ValueType> {
441     used: i32,
442     fill: i32,
443     mask: i32,
444     map: Option<Vec<GrowingHashmapMapElemChar<ValueType>>>,
445 }
446 
447 impl<ValueType> Default for GrowingHashmapChar<ValueType>
448 where
449     ValueType: Default + Clone + Eq,
450 {
default() -> Self451     fn default() -> Self {
452         Self {
453             used: 0,
454             fill: 0,
455             mask: -1,
456             map: None,
457         }
458     }
459 }
460 
461 impl<ValueType> GrowingHashmapChar<ValueType>
462 where
463     ValueType: Default + Clone + Eq + Copy,
464 {
get(&self, key: u32) -> ValueType465     fn get(&self, key: u32) -> ValueType {
466         self.map
467             .as_ref()
468             .map_or_else(|| Default::default(), |map| map[self.lookup(key)].value)
469     }
470 
get_mut(&mut self, key: u32) -> &mut ValueType471     fn get_mut(&mut self, key: u32) -> &mut ValueType {
472         if self.map.is_none() {
473             self.allocate();
474         }
475 
476         let mut i = self.lookup(key);
477         if self
478             .map
479             .as_ref()
480             .expect("map should have been created above")[i]
481             .value
482             == Default::default()
483         {
484             self.fill += 1;
485             // resize when 2/3 full
486             if self.fill * 3 >= (self.mask + 1) * 2 {
487                 self.grow((self.used + 1) * 2);
488                 i = self.lookup(key);
489             }
490 
491             self.used += 1;
492         }
493 
494         let elem = &mut self
495             .map
496             .as_mut()
497             .expect("map should have been created above")[i];
498         elem.key = key;
499         &mut elem.value
500     }
501 
allocate(&mut self)502     fn allocate(&mut self) {
503         self.mask = 8 - 1;
504         self.map = Some(vec![GrowingHashmapMapElemChar::default(); 8]);
505     }
506 
507     /// lookup key inside the hashmap using a similar collision resolution
508     /// strategy to `CPython` and `Ruby`
lookup(&self, key: u32) -> usize509     fn lookup(&self, key: u32) -> usize {
510         let hash = key;
511         let mut i = hash as usize & self.mask as usize;
512 
513         let map = self
514             .map
515             .as_ref()
516             .expect("callers have to ensure map is allocated");
517 
518         if map[i].value == Default::default() || map[i].key == key {
519             return i;
520         }
521 
522         let mut perturb = key;
523         loop {
524             i = (i * 5 + perturb as usize + 1) & self.mask as usize;
525 
526             if map[i].value == Default::default() || map[i].key == key {
527                 return i;
528             }
529 
530             perturb >>= 5;
531         }
532     }
533 
grow(&mut self, min_used: i32)534     fn grow(&mut self, min_used: i32) {
535         let mut new_size = self.mask + 1;
536         while new_size <= min_used {
537             new_size <<= 1;
538         }
539 
540         self.fill = self.used;
541         self.mask = new_size - 1;
542 
543         let old_map = std::mem::replace(
544             self.map
545                 .as_mut()
546                 .expect("callers have to ensure map is allocated"),
547             vec![GrowingHashmapMapElemChar::<ValueType>::default(); new_size as usize],
548         );
549 
550         for elem in old_map {
551             if elem.value != Default::default() {
552                 let j = self.lookup(elem.key);
553                 let new_elem = &mut self.map.as_mut().expect("map created above")[j];
554                 new_elem.key = elem.key;
555                 new_elem.value = elem.value;
556                 self.used -= 1;
557                 if self.used == 0 {
558                     break;
559                 }
560             }
561         }
562 
563         self.used = self.fill;
564     }
565 }
566 
567 struct HybridGrowingHashmapChar<ValueType> {
568     map: GrowingHashmapChar<ValueType>,
569     extended_ascii: [ValueType; 256],
570 }
571 
572 impl<ValueType> HybridGrowingHashmapChar<ValueType>
573 where
574     ValueType: Default + Clone + Copy + Eq,
575 {
get(&self, key: char) -> ValueType576     fn get(&self, key: char) -> ValueType {
577         let value = key as u32;
578         if value <= 255 {
579             let val_u8 = u8::try_from(value).expect("we check the bounds above");
580             self.extended_ascii[usize::from(val_u8)]
581         } else {
582             self.map.get(value)
583         }
584     }
585 
get_mut(&mut self, key: char) -> &mut ValueType586     fn get_mut(&mut self, key: char) -> &mut ValueType {
587         let value = key as u32;
588         if value <= 255 {
589             let val_u8 = u8::try_from(value).expect("we check the bounds above");
590             &mut self.extended_ascii[usize::from(val_u8)]
591         } else {
592             self.map.get_mut(value)
593         }
594     }
595 }
596 
597 impl<ValueType> Default for HybridGrowingHashmapChar<ValueType>
598 where
599     ValueType: Default + Clone + Copy + Eq,
600 {
default() -> Self601     fn default() -> Self {
602         HybridGrowingHashmapChar {
603             map: GrowingHashmapChar::default(),
604             extended_ascii: [Default::default(); 256],
605         }
606     }
607 }
608 
damerau_levenshtein_impl<Iter1, Iter2>(s1: Iter1, len1: usize, s2: Iter2, len2: usize) -> usize where Iter1: Iterator<Item = char> + Clone, Iter2: Iterator<Item = char> + Clone,609 fn damerau_levenshtein_impl<Iter1, Iter2>(s1: Iter1, len1: usize, s2: Iter2, len2: usize) -> usize
610 where
611     Iter1: Iterator<Item = char> + Clone,
612     Iter2: Iterator<Item = char> + Clone,
613 {
614     // The implementations is based on the paper
615     // `Linear space string correction algorithm using the Damerau-Levenshtein distance`
616     // from Chunchun Zhao and Sartaj Sahni
617     //
618     // It has a runtime complexity of `O(N*M)` and a memory usage of `O(N+M)`.
619     let max_val = max(len1, len2) as isize + 1;
620 
621     let mut last_row_id = HybridGrowingHashmapChar::<RowId>::default();
622 
623     let size = len2 + 2;
624     let mut fr = vec![max_val; size];
625     let mut r1 = vec![max_val; size];
626     let mut r: Vec<isize> = (max_val..max_val + 1)
627         .chain(0..(size - 1) as isize)
628         .collect();
629 
630     for (i, ch1) in s1.enumerate().map(|(i, ch1)| (i + 1, ch1)) {
631         mem::swap(&mut r, &mut r1);
632         let mut last_col_id: isize = -1;
633         let mut last_i2l1 = r[1];
634         r[1] = i as isize;
635         let mut t = max_val;
636 
637         for (j, ch2) in s2.clone().enumerate().map(|(j, ch2)| (j + 1, ch2)) {
638             let diag = r1[j] + isize::from(ch1 != ch2);
639             let left = r[j] + 1;
640             let up = r1[j + 1] + 1;
641             let mut temp = min(diag, min(left, up));
642 
643             if ch1 == ch2 {
644                 last_col_id = j as isize; // last occurence of s1_i
645                 fr[j + 1] = r1[j - 1]; // save H_k-1,j-2
646                 t = last_i2l1; // save H_i-2,l-1
647             } else {
648                 let k = last_row_id.get(ch2).val;
649                 let l = last_col_id;
650 
651                 if j as isize - l == 1 {
652                     let transpose = fr[j + 1] + (i as isize - k);
653                     temp = min(temp, transpose);
654                 } else if i as isize - k == 1 {
655                     let transpose = t + (j as isize - l);
656                     temp = min(temp, transpose);
657                 }
658             }
659 
660             last_i2l1 = r[j + 1];
661             r[j + 1] = temp;
662         }
663         last_row_id.get_mut(ch1).val = i as isize;
664     }
665 
666     r[len2 + 1] as usize
667 }
668 
669 /// Like optimal string alignment, but substrings can be edited an unlimited
670 /// number of times, and the triangle inequality holds.
671 ///
672 /// ```
673 /// use strsim::damerau_levenshtein;
674 ///
675 /// assert_eq!(2, damerau_levenshtein("ab", "bca"));
676 /// ```
damerau_levenshtein(a: &str, b: &str) -> usize677 pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
678     damerau_levenshtein_impl(a.chars(), a.chars().count(), b.chars(), b.chars().count())
679 }
680 
681 /// Calculates a normalized score of the Damerau–Levenshtein algorithm between
682 /// 0.0 and 1.0 (inclusive), where 1.0 means the strings are the same.
683 ///
684 /// ```
685 /// use strsim::normalized_damerau_levenshtein;
686 ///
687 /// assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
688 /// assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
689 /// assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
690 /// assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
691 /// assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
692 /// ```
normalized_damerau_levenshtein(a: &str, b: &str) -> f64693 pub fn normalized_damerau_levenshtein(a: &str, b: &str) -> f64 {
694     if a.is_empty() && b.is_empty() {
695         return 1.0;
696     }
697 
698     let len1 = a.chars().count();
699     let len2 = b.chars().count();
700     let dist = damerau_levenshtein_impl(a.chars(), len1, b.chars(), len2);
701     1.0 - (dist as f64) / (max(len1, len2) as f64)
702 }
703 
704 /// Returns an Iterator of char tuples.
bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_705 fn bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_ {
706     s.chars().zip(s.chars().skip(1))
707 }
708 
709 /// Calculates a Sørensen-Dice similarity distance using bigrams.
710 /// See <https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient>.
711 ///
712 /// ```
713 /// use strsim::sorensen_dice;
714 ///
715 /// assert_eq!(1.0, sorensen_dice("", ""));
716 /// assert_eq!(0.0, sorensen_dice("", "a"));
717 /// assert_eq!(0.0, sorensen_dice("french", "quebec"));
718 /// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
719 /// assert_eq!(0.8888888888888888, sorensen_dice("feris", "ferris"));
720 /// ```
sorensen_dice(a: &str, b: &str) -> f64721 pub fn sorensen_dice(a: &str, b: &str) -> f64 {
722     // implementation guided by
723     // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/index.js#L6
724 
725     let a: String = a.chars().filter(|&x| !char::is_whitespace(x)).collect();
726     let b: String = b.chars().filter(|&x| !char::is_whitespace(x)).collect();
727 
728     if a == b {
729         return 1.0;
730     }
731 
732     if a.len() < 2 || b.len() < 2 {
733         return 0.0;
734     }
735 
736     let mut a_bigrams: HashMap<(char, char), usize> = HashMap::new();
737 
738     for bigram in bigrams(&a) {
739         *a_bigrams.entry(bigram).or_insert(0) += 1;
740     }
741 
742     let mut intersection_size = 0_usize;
743 
744     for bigram in bigrams(&b) {
745         a_bigrams.entry(bigram).and_modify(|bi| {
746             if *bi > 0 {
747                 *bi -= 1;
748                 intersection_size += 1;
749             }
750         });
751     }
752 
753     (2 * intersection_size) as f64 / (a.len() + b.len() - 2) as f64
754 }
755 
756 #[cfg(test)]
757 mod tests {
758     use super::*;
759 
760     macro_rules! assert_delta {
761         ($x:expr, $y:expr) => {
762             assert_delta!($x, $y, 1e-5);
763         };
764         ($x:expr, $y:expr, $d:expr) => {
765             if ($x - $y).abs() > $d {
766                 panic!(
767                     "assertion failed: actual: `{}`, expected: `{}`: \
768                     actual not within < {} of expected",
769                     $x, $y, $d
770                 );
771             }
772         };
773     }
774 
775     #[test]
bigrams_iterator()776     fn bigrams_iterator() {
777         let mut bi = bigrams("abcde");
778 
779         assert_eq!(Some(('a', 'b')), bi.next());
780         assert_eq!(Some(('b', 'c')), bi.next());
781         assert_eq!(Some(('c', 'd')), bi.next());
782         assert_eq!(Some(('d', 'e')), bi.next());
783         assert_eq!(None, bi.next());
784     }
785 
assert_hamming_dist(dist: usize, str1: &str, str2: &str)786     fn assert_hamming_dist(dist: usize, str1: &str, str2: &str) {
787         assert_eq!(Ok(dist), hamming(str1, str2));
788     }
789 
790     #[test]
hamming_empty()791     fn hamming_empty() {
792         assert_hamming_dist(0, "", "")
793     }
794 
795     #[test]
hamming_same()796     fn hamming_same() {
797         assert_hamming_dist(0, "hamming", "hamming")
798     }
799 
800     #[test]
hamming_numbers()801     fn hamming_numbers() {
802         assert_eq!(Ok(1), generic_hamming(&[1, 2, 4], &[1, 2, 3]));
803     }
804 
805     #[test]
hamming_diff()806     fn hamming_diff() {
807         assert_hamming_dist(3, "hamming", "hammers")
808     }
809 
810     #[test]
hamming_diff_multibyte()811     fn hamming_diff_multibyte() {
812         assert_hamming_dist(2, "hamming", "h香mmüng");
813     }
814 
815     #[test]
hamming_unequal_length()816     fn hamming_unequal_length() {
817         assert_eq!(
818             Err(StrSimError::DifferentLengthArgs),
819             generic_hamming("ham".chars(), "hamming".chars())
820         );
821     }
822 
823     #[test]
hamming_names()824     fn hamming_names() {
825         assert_hamming_dist(14, "Friedrich Nietzs", "Jean-Paul Sartre")
826     }
827 
828     #[test]
jaro_both_empty()829     fn jaro_both_empty() {
830         assert_eq!(1.0, jaro("", ""));
831     }
832 
833     #[test]
jaro_first_empty()834     fn jaro_first_empty() {
835         assert_eq!(0.0, jaro("", "jaro"));
836     }
837 
838     #[test]
jaro_second_empty()839     fn jaro_second_empty() {
840         assert_eq!(0.0, jaro("distance", ""));
841     }
842 
843     #[test]
jaro_same()844     fn jaro_same() {
845         assert_eq!(1.0, jaro("jaro", "jaro"));
846     }
847 
848     #[test]
jaro_multibyte()849     fn jaro_multibyte() {
850         assert_delta!(0.818, jaro("testabctest", "testöঙ香test"), 0.001);
851         assert_delta!(0.818, jaro("testöঙ香test", "testabctest"), 0.001);
852     }
853 
854     #[test]
jaro_diff_short()855     fn jaro_diff_short() {
856         assert_delta!(0.767, jaro("dixon", "dicksonx"), 0.001);
857     }
858 
859     #[test]
jaro_diff_one_character()860     fn jaro_diff_one_character() {
861         assert_eq!(0.0, jaro("a", "b"));
862     }
863 
864     #[test]
jaro_same_one_character()865     fn jaro_same_one_character() {
866         assert_eq!(1.0, jaro("a", "a"));
867     }
868 
869     #[test]
generic_jaro_diff()870     fn generic_jaro_diff() {
871         assert_eq!(0.0, generic_jaro(&[1, 2], &[3, 4]));
872     }
873 
874     #[test]
jaro_diff_one_and_two()875     fn jaro_diff_one_and_two() {
876         assert_delta!(0.83, jaro("a", "ab"), 0.01);
877     }
878 
879     #[test]
jaro_diff_two_and_one()880     fn jaro_diff_two_and_one() {
881         assert_delta!(0.83, jaro("ab", "a"), 0.01);
882     }
883 
884     #[test]
jaro_diff_no_transposition()885     fn jaro_diff_no_transposition() {
886         assert_delta!(0.822, jaro("dwayne", "duane"), 0.001);
887     }
888 
889     #[test]
jaro_diff_with_transposition()890     fn jaro_diff_with_transposition() {
891         assert_delta!(0.944, jaro("martha", "marhta"), 0.001);
892         assert_delta!(0.6, jaro("a jke", "jane a k"), 0.001);
893     }
894 
895     #[test]
jaro_names()896     fn jaro_names() {
897         assert_delta!(
898             0.392,
899             jaro("Friedrich Nietzsche", "Jean-Paul Sartre"),
900             0.001
901         );
902     }
903 
904     #[test]
jaro_winkler_both_empty()905     fn jaro_winkler_both_empty() {
906         assert_eq!(1.0, jaro_winkler("", ""));
907     }
908 
909     #[test]
jaro_winkler_first_empty()910     fn jaro_winkler_first_empty() {
911         assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
912     }
913 
914     #[test]
jaro_winkler_second_empty()915     fn jaro_winkler_second_empty() {
916         assert_eq!(0.0, jaro_winkler("distance", ""));
917     }
918 
919     #[test]
jaro_winkler_same()920     fn jaro_winkler_same() {
921         assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
922     }
923 
924     #[test]
jaro_winkler_multibyte()925     fn jaro_winkler_multibyte() {
926         assert_delta!(0.89, jaro_winkler("testabctest", "testöঙ香test"), 0.001);
927         assert_delta!(0.89, jaro_winkler("testöঙ香test", "testabctest"), 0.001);
928     }
929 
930     #[test]
jaro_winkler_diff_short()931     fn jaro_winkler_diff_short() {
932         assert_delta!(0.813, jaro_winkler("dixon", "dicksonx"), 0.001);
933         assert_delta!(0.813, jaro_winkler("dicksonx", "dixon"), 0.001);
934     }
935 
936     #[test]
jaro_winkler_diff_one_character()937     fn jaro_winkler_diff_one_character() {
938         assert_eq!(0.0, jaro_winkler("a", "b"));
939     }
940 
941     #[test]
jaro_winkler_same_one_character()942     fn jaro_winkler_same_one_character() {
943         assert_eq!(1.0, jaro_winkler("a", "a"));
944     }
945 
946     #[test]
jaro_winkler_diff_no_transposition()947     fn jaro_winkler_diff_no_transposition() {
948         assert_delta!(0.84, jaro_winkler("dwayne", "duane"), 0.001);
949     }
950 
951     #[test]
jaro_winkler_diff_with_transposition()952     fn jaro_winkler_diff_with_transposition() {
953         assert_delta!(0.961, jaro_winkler("martha", "marhta"), 0.001);
954         assert_delta!(0.6, jaro_winkler("a jke", "jane a k"), 0.001);
955     }
956 
957     #[test]
jaro_winkler_names()958     fn jaro_winkler_names() {
959         assert_delta!(
960             0.452,
961             jaro_winkler("Friedrich Nietzsche", "Fran-Paul Sartre"),
962             0.001
963         );
964     }
965 
966     #[test]
jaro_winkler_long_prefix()967     fn jaro_winkler_long_prefix() {
968         assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
969     }
970 
971     #[test]
jaro_winkler_more_names()972     fn jaro_winkler_more_names() {
973         assert_delta!(0.868, jaro_winkler("Thorkel", "Thorgier"), 0.001);
974     }
975 
976     #[test]
jaro_winkler_length_of_one()977     fn jaro_winkler_length_of_one() {
978         assert_delta!(0.738, jaro_winkler("Dinsdale", "D"), 0.001);
979     }
980 
981     #[test]
jaro_winkler_very_long_prefix()982     fn jaro_winkler_very_long_prefix() {
983         assert_delta!(
984             0.98519,
985             jaro_winkler("thequickbrownfoxjumpedoverx", "thequickbrownfoxjumpedovery")
986         );
987     }
988 
989     #[test]
levenshtein_empty()990     fn levenshtein_empty() {
991         assert_eq!(0, levenshtein("", ""));
992     }
993 
994     #[test]
levenshtein_same()995     fn levenshtein_same() {
996         assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
997     }
998 
999     #[test]
levenshtein_diff_short()1000     fn levenshtein_diff_short() {
1001         assert_eq!(3, levenshtein("kitten", "sitting"));
1002     }
1003 
1004     #[test]
levenshtein_diff_with_space()1005     fn levenshtein_diff_with_space() {
1006         assert_eq!(5, levenshtein("hello, world", "bye, world"));
1007     }
1008 
1009     #[test]
levenshtein_diff_multibyte()1010     fn levenshtein_diff_multibyte() {
1011         assert_eq!(3, levenshtein("öঙ香", "abc"));
1012         assert_eq!(3, levenshtein("abc", "öঙ香"));
1013     }
1014 
1015     #[test]
levenshtein_diff_longer()1016     fn levenshtein_diff_longer() {
1017         let a = "The quick brown fox jumped over the angry dog.";
1018         let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
1019         assert_eq!(37, levenshtein(a, b));
1020     }
1021 
1022     #[test]
levenshtein_first_empty()1023     fn levenshtein_first_empty() {
1024         assert_eq!(7, levenshtein("", "sitting"));
1025     }
1026 
1027     #[test]
levenshtein_second_empty()1028     fn levenshtein_second_empty() {
1029         assert_eq!(6, levenshtein("kitten", ""));
1030     }
1031 
1032     #[test]
normalized_levenshtein_diff_short()1033     fn normalized_levenshtein_diff_short() {
1034         assert_delta!(0.57142, normalized_levenshtein("kitten", "sitting"));
1035     }
1036 
1037     #[test]
normalized_levenshtein_for_empty_strings()1038     fn normalized_levenshtein_for_empty_strings() {
1039         assert_delta!(1.0, normalized_levenshtein("", ""));
1040     }
1041 
1042     #[test]
normalized_levenshtein_first_empty()1043     fn normalized_levenshtein_first_empty() {
1044         assert_delta!(0.0, normalized_levenshtein("", "second"));
1045     }
1046 
1047     #[test]
normalized_levenshtein_second_empty()1048     fn normalized_levenshtein_second_empty() {
1049         assert_delta!(0.0, normalized_levenshtein("first", ""));
1050     }
1051 
1052     #[test]
normalized_levenshtein_identical_strings()1053     fn normalized_levenshtein_identical_strings() {
1054         assert_delta!(1.0, normalized_levenshtein("identical", "identical"));
1055     }
1056 
1057     #[test]
osa_distance_empty()1058     fn osa_distance_empty() {
1059         assert_eq!(0, osa_distance("", ""));
1060     }
1061 
1062     #[test]
osa_distance_same()1063     fn osa_distance_same() {
1064         assert_eq!(0, osa_distance("damerau", "damerau"));
1065     }
1066 
1067     #[test]
osa_distance_first_empty()1068     fn osa_distance_first_empty() {
1069         assert_eq!(7, osa_distance("", "damerau"));
1070     }
1071 
1072     #[test]
osa_distance_second_empty()1073     fn osa_distance_second_empty() {
1074         assert_eq!(7, osa_distance("damerau", ""));
1075     }
1076 
1077     #[test]
osa_distance_diff()1078     fn osa_distance_diff() {
1079         assert_eq!(3, osa_distance("ca", "abc"));
1080     }
1081 
1082     #[test]
osa_distance_diff_short()1083     fn osa_distance_diff_short() {
1084         assert_eq!(3, osa_distance("damerau", "aderua"));
1085     }
1086 
1087     #[test]
osa_distance_diff_reversed()1088     fn osa_distance_diff_reversed() {
1089         assert_eq!(3, osa_distance("aderua", "damerau"));
1090     }
1091 
1092     #[test]
osa_distance_diff_multibyte()1093     fn osa_distance_diff_multibyte() {
1094         assert_eq!(3, osa_distance("öঙ香", "abc"));
1095         assert_eq!(3, osa_distance("abc", "öঙ香"));
1096     }
1097 
1098     #[test]
osa_distance_diff_unequal_length()1099     fn osa_distance_diff_unequal_length() {
1100         assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
1101     }
1102 
1103     #[test]
osa_distance_diff_unequal_length_reversed()1104     fn osa_distance_diff_unequal_length_reversed() {
1105         assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
1106     }
1107 
1108     #[test]
osa_distance_diff_comedians()1109     fn osa_distance_diff_comedians() {
1110         assert_eq!(5, osa_distance("Stewart", "Colbert"));
1111     }
1112 
1113     #[test]
osa_distance_many_transpositions()1114     fn osa_distance_many_transpositions() {
1115         assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
1116     }
1117 
1118     #[test]
osa_distance_diff_longer()1119     fn osa_distance_diff_longer() {
1120         let a = "The quick brown fox jumped over the angry dog.";
1121         let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
1122         assert_eq!(36, osa_distance(a, b));
1123     }
1124 
1125     #[test]
osa_distance_beginning_transposition()1126     fn osa_distance_beginning_transposition() {
1127         assert_eq!(1, osa_distance("foobar", "ofobar"));
1128     }
1129 
1130     #[test]
osa_distance_end_transposition()1131     fn osa_distance_end_transposition() {
1132         assert_eq!(1, osa_distance("specter", "spectre"));
1133     }
1134 
1135     #[test]
osa_distance_restricted_edit()1136     fn osa_distance_restricted_edit() {
1137         assert_eq!(4, osa_distance("a cat", "an abct"));
1138     }
1139 
1140     #[test]
damerau_levenshtein_empty()1141     fn damerau_levenshtein_empty() {
1142         assert_eq!(0, damerau_levenshtein("", ""));
1143     }
1144 
1145     #[test]
damerau_levenshtein_same()1146     fn damerau_levenshtein_same() {
1147         assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
1148     }
1149 
1150     #[test]
damerau_levenshtein_first_empty()1151     fn damerau_levenshtein_first_empty() {
1152         assert_eq!(7, damerau_levenshtein("", "damerau"));
1153     }
1154 
1155     #[test]
damerau_levenshtein_second_empty()1156     fn damerau_levenshtein_second_empty() {
1157         assert_eq!(7, damerau_levenshtein("damerau", ""));
1158     }
1159 
1160     #[test]
damerau_levenshtein_diff()1161     fn damerau_levenshtein_diff() {
1162         assert_eq!(2, damerau_levenshtein("ca", "abc"));
1163     }
1164 
1165     #[test]
damerau_levenshtein_diff_short()1166     fn damerau_levenshtein_diff_short() {
1167         assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
1168     }
1169 
1170     #[test]
damerau_levenshtein_diff_reversed()1171     fn damerau_levenshtein_diff_reversed() {
1172         assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
1173     }
1174 
1175     #[test]
damerau_levenshtein_diff_multibyte()1176     fn damerau_levenshtein_diff_multibyte() {
1177         assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
1178         assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
1179     }
1180 
1181     #[test]
damerau_levenshtein_diff_unequal_length()1182     fn damerau_levenshtein_diff_unequal_length() {
1183         assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
1184     }
1185 
1186     #[test]
damerau_levenshtein_diff_unequal_length_reversed()1187     fn damerau_levenshtein_diff_unequal_length_reversed() {
1188         assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
1189     }
1190 
1191     #[test]
damerau_levenshtein_diff_comedians()1192     fn damerau_levenshtein_diff_comedians() {
1193         assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
1194     }
1195 
1196     #[test]
damerau_levenshtein_many_transpositions()1197     fn damerau_levenshtein_many_transpositions() {
1198         assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
1199     }
1200 
1201     #[test]
damerau_levenshtein_diff_longer()1202     fn damerau_levenshtein_diff_longer() {
1203         let a = "The quick brown fox jumped over the angry dog.";
1204         let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
1205         assert_eq!(36, damerau_levenshtein(a, b));
1206     }
1207 
1208     #[test]
damerau_levenshtein_beginning_transposition()1209     fn damerau_levenshtein_beginning_transposition() {
1210         assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
1211     }
1212 
1213     #[test]
damerau_levenshtein_end_transposition()1214     fn damerau_levenshtein_end_transposition() {
1215         assert_eq!(1, damerau_levenshtein("specter", "spectre"));
1216     }
1217 
1218     #[test]
damerau_levenshtein_unrestricted_edit()1219     fn damerau_levenshtein_unrestricted_edit() {
1220         assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
1221     }
1222 
1223     #[test]
normalized_damerau_levenshtein_diff_short()1224     fn normalized_damerau_levenshtein_diff_short() {
1225         assert_delta!(
1226             0.27272,
1227             normalized_damerau_levenshtein("levenshtein", "löwenbräu")
1228         );
1229     }
1230 
1231     #[test]
normalized_damerau_levenshtein_for_empty_strings()1232     fn normalized_damerau_levenshtein_for_empty_strings() {
1233         assert_delta!(1.0, normalized_damerau_levenshtein("", ""));
1234     }
1235 
1236     #[test]
normalized_damerau_levenshtein_first_empty()1237     fn normalized_damerau_levenshtein_first_empty() {
1238         assert_delta!(0.0, normalized_damerau_levenshtein("", "flower"));
1239     }
1240 
1241     #[test]
normalized_damerau_levenshtein_second_empty()1242     fn normalized_damerau_levenshtein_second_empty() {
1243         assert_delta!(0.0, normalized_damerau_levenshtein("tree", ""));
1244     }
1245 
1246     #[test]
normalized_damerau_levenshtein_identical_strings()1247     fn normalized_damerau_levenshtein_identical_strings() {
1248         assert_delta!(
1249             1.0,
1250             normalized_damerau_levenshtein("sunglasses", "sunglasses")
1251         );
1252     }
1253 
1254     #[test]
sorensen_dice_all()1255     fn sorensen_dice_all() {
1256         // test cases taken from
1257         // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/spec/index.spec.js#L11
1258 
1259         assert_delta!(1.0, sorensen_dice("a", "a"));
1260         assert_delta!(0.0, sorensen_dice("a", "b"));
1261         assert_delta!(1.0, sorensen_dice("", ""));
1262         assert_delta!(0.0, sorensen_dice("a", ""));
1263         assert_delta!(0.0, sorensen_dice("", "a"));
1264         assert_delta!(1.0, sorensen_dice("apple event", "apple    event"));
1265         assert_delta!(0.90909, sorensen_dice("iphone", "iphone x"));
1266         assert_delta!(0.0, sorensen_dice("french", "quebec"));
1267         assert_delta!(1.0, sorensen_dice("france", "france"));
1268         assert_delta!(0.2, sorensen_dice("fRaNce", "france"));
1269         assert_delta!(0.8, sorensen_dice("healed", "sealed"));
1270         assert_delta!(
1271             0.78788,
1272             sorensen_dice("web applications", "applications of the web")
1273         );
1274         assert_delta!(
1275             0.92,
1276             sorensen_dice(
1277                 "this will have a typo somewhere",
1278                 "this will huve a typo somewhere"
1279             )
1280         );
1281         assert_delta!(
1282             0.60606,
1283             sorensen_dice(
1284                 "Olive-green table for sale, in extremely good condition.",
1285                 "For sale: table in very good  condition, olive green in colour."
1286             )
1287         );
1288         assert_delta!(
1289             0.25581,
1290             sorensen_dice(
1291                 "Olive-green table for sale, in extremely good condition.",
1292                 "For sale: green Subaru Impreza, 210,000 miles"
1293             )
1294         );
1295         assert_delta!(
1296             0.14118,
1297             sorensen_dice(
1298                 "Olive-green table for sale, in extremely good condition.",
1299                 "Wanted: mountain bike with at least 21 gears."
1300             )
1301         );
1302         assert_delta!(
1303             0.77419,
1304             sorensen_dice("this has one extra word", "this has one word")
1305         );
1306     }
1307 }
1308