1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/zucchini/binary_data_histogram.h"
6
7 #include <stddef.h>
8
9 #include <memory>
10 #include <vector>
11
12 #include "components/zucchini/buffer_view.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14
15 namespace zucchini {
16
TEST(OutlierDetectorTest,Basic)17 TEST(OutlierDetectorTest, Basic) {
18 auto make_detector = [](const std::vector<double>& values) {
19 auto detector = std::make_unique<OutlierDetector>();
20 for (double v : values)
21 detector->Add(v);
22 detector->Prepare();
23 return detector;
24 };
25
26 std::unique_ptr<OutlierDetector> detector;
27 // No data: Should at least not cause error.
28 detector = make_detector({});
29 EXPECT_EQ(0, detector->DecideOutlier(0.0));
30 // Single point: Trivially inert.
31 detector = make_detector({0.5});
32 EXPECT_EQ(0, detector->DecideOutlier(0.1));
33 EXPECT_EQ(0, detector->DecideOutlier(0.5));
34 EXPECT_EQ(0, detector->DecideOutlier(0.9));
35 // Two identical points: StdDev is 0, so falls back to built-in tolerance.
36 detector = make_detector({0.5, 0.5});
37 EXPECT_EQ(-1, detector->DecideOutlier(0.3));
38 EXPECT_EQ(0, detector->DecideOutlier(0.499));
39 EXPECT_EQ(0, detector->DecideOutlier(0.5));
40 EXPECT_EQ(0, detector->DecideOutlier(0.501));
41 EXPECT_EQ(1, detector->DecideOutlier(0.7));
42 // Two separate points: Outliner test is pretty lax.
43 detector = make_detector({0.4, 0.6});
44 EXPECT_EQ(-1, detector->DecideOutlier(0.2));
45 EXPECT_EQ(0, detector->DecideOutlier(0.3));
46 EXPECT_EQ(0, detector->DecideOutlier(0.5));
47 EXPECT_EQ(0, detector->DecideOutlier(0.7));
48 EXPECT_EQ(1, detector->DecideOutlier(0.8));
49 // Sharpen distribution by clustering toward norm: Now test is stricter.
50 detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6});
51 EXPECT_EQ(-1, detector->DecideOutlier(0.3));
52 EXPECT_EQ(0, detector->DecideOutlier(0.4));
53 EXPECT_EQ(0, detector->DecideOutlier(0.5));
54 EXPECT_EQ(0, detector->DecideOutlier(0.6));
55 EXPECT_EQ(1, detector->DecideOutlier(0.7));
56 // Shift numbers around: Mean is 0.3, and data order scrambled.
57 detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30});
58 EXPECT_EQ(-1, detector->DecideOutlier(0.0));
59 EXPECT_EQ(-1, detector->DecideOutlier(0.1));
60 EXPECT_EQ(0, detector->DecideOutlier(0.2));
61 EXPECT_EQ(0, detector->DecideOutlier(0.3));
62 EXPECT_EQ(0, detector->DecideOutlier(0.4));
63 EXPECT_EQ(1, detector->DecideOutlier(0.5));
64 EXPECT_EQ(1, detector->DecideOutlier(1.0));
65 // Typical usage: Potential outlier would be part of original input data!
66 detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6});
67 EXPECT_EQ(-1, detector->DecideOutlier(0.0));
68 EXPECT_EQ(0, detector->DecideOutlier(0.28));
69 EXPECT_EQ(0, detector->DecideOutlier(0.29));
70 EXPECT_EQ(0, detector->DecideOutlier(0.3));
71 EXPECT_EQ(0, detector->DecideOutlier(0.31));
72 EXPECT_EQ(0, detector->DecideOutlier(0.32));
73 EXPECT_EQ(1, detector->DecideOutlier(0.6));
74 }
75
TEST(BinaryDataHistogramTest,Basic)76 TEST(BinaryDataHistogramTest, Basic) {
77 constexpr double kUninitScore = -1;
78
79 constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255};
80 const size_t n = sizeof(kTestData);
81 ConstBufferView region(kTestData, n);
82
83 std::vector<BinaryDataHistogram> prefix_histograms(n + 1); // Short to long.
84 std::vector<BinaryDataHistogram> suffix_histograms(n + 1); // Long to short.
85
86 for (size_t i = 0; i <= n; ++i) {
87 ConstBufferView prefix(region.begin(), i);
88 ConstBufferView suffix(region.begin() + i, n - i);
89 // If regions are smaller than 2 bytes then it is invalid. Else valid.
90 EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix));
91 EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix));
92 // IsValid() returns the same results.
93 EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid());
94 EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid());
95 }
96
97 // Full-prefix = full-suffix = full data.
98 EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0]));
99 EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n]));
100
101 // Testing heuristics without overreliance on implementation details.
102
103 // Strict prefixes, in increasing size. Compare against full data.
104 double prev_prefix_score = kUninitScore;
105 for (size_t i = 2; i < n; ++i) {
106 double score = prefix_histograms[i].Distance(prefix_histograms[n]);
107 // Positivity.
108 EXPECT_GT(score, 0.0);
109 // Symmetry.
110 EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i]));
111 // Distance should decrease as prefix gets nearer to full data.
112 if (prev_prefix_score != kUninitScore)
113 EXPECT_LT(score, prev_prefix_score);
114 prev_prefix_score = score;
115 }
116
117 // Strict suffixes, in decreasing size. Compare against full data.
118 double prev_suffix_score = -1;
119 for (size_t i = 1; i <= n - 2; ++i) {
120 double score = suffix_histograms[i].Distance(suffix_histograms[0]);
121 // Positivity.
122 EXPECT_GT(score, 0.0);
123 // Symmetry.
124 EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i]));
125 // Distance should increase as suffix gets farther from full data.
126 if (prev_suffix_score != kUninitScore)
127 EXPECT_GT(score, prev_suffix_score);
128 prev_suffix_score = score;
129 }
130 }
131
132 } // namespace zucchini
133