xref: /aosp_15_r20/external/libtextclassifier/native/annotator/annotator.h (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker // Inference code for the text classification model.
18*993b0882SAndroid Build Coastguard Worker 
19*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_
20*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_
21*993b0882SAndroid Build Coastguard Worker 
22*993b0882SAndroid Build Coastguard Worker #include <memory>
23*993b0882SAndroid Build Coastguard Worker #include <set>
24*993b0882SAndroid Build Coastguard Worker #include <string>
25*993b0882SAndroid Build Coastguard Worker #include <unordered_set>
26*993b0882SAndroid Build Coastguard Worker #include <vector>
27*993b0882SAndroid Build Coastguard Worker 
28*993b0882SAndroid Build Coastguard Worker #include "annotator/contact/contact-engine.h"
29*993b0882SAndroid Build Coastguard Worker #include "annotator/datetime/datetime-grounder.h"
30*993b0882SAndroid Build Coastguard Worker #include "annotator/datetime/parser.h"
31*993b0882SAndroid Build Coastguard Worker #include "annotator/duration/duration.h"
32*993b0882SAndroid Build Coastguard Worker #include "annotator/experimental/experimental.h"
33*993b0882SAndroid Build Coastguard Worker #include "annotator/feature-processor.h"
34*993b0882SAndroid Build Coastguard Worker #include "annotator/grammar/grammar-annotator.h"
35*993b0882SAndroid Build Coastguard Worker #include "annotator/installed_app/installed-app-engine.h"
36*993b0882SAndroid Build Coastguard Worker #include "annotator/knowledge/knowledge-engine.h"
37*993b0882SAndroid Build Coastguard Worker #include "annotator/model-executor.h"
38*993b0882SAndroid Build Coastguard Worker #include "annotator/model_generated.h"
39*993b0882SAndroid Build Coastguard Worker #include "annotator/number/number.h"
40*993b0882SAndroid Build Coastguard Worker #include "annotator/person_name/person-name-engine.h"
41*993b0882SAndroid Build Coastguard Worker #include "annotator/pod_ner/pod-ner.h"
42*993b0882SAndroid Build Coastguard Worker #include "annotator/strip-unpaired-brackets.h"
43*993b0882SAndroid Build Coastguard Worker #include "annotator/translate/translate.h"
44*993b0882SAndroid Build Coastguard Worker #include "annotator/types.h"
45*993b0882SAndroid Build Coastguard Worker #include "annotator/vocab/vocab-annotator.h"
46*993b0882SAndroid Build Coastguard Worker #include "annotator/zlib-utils.h"
47*993b0882SAndroid Build Coastguard Worker #include "utils/base/status.h"
48*993b0882SAndroid Build Coastguard Worker #include "utils/base/statusor.h"
49*993b0882SAndroid Build Coastguard Worker #include "utils/calendar/calendar.h"
50*993b0882SAndroid Build Coastguard Worker #include "utils/flatbuffers/flatbuffers.h"
51*993b0882SAndroid Build Coastguard Worker #include "utils/flatbuffers/mutable.h"
52*993b0882SAndroid Build Coastguard Worker #include "utils/i18n/locale.h"
53*993b0882SAndroid Build Coastguard Worker #include "utils/memory/mmap.h"
54*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unicodetext.h"
55*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib.h"
56*993b0882SAndroid Build Coastguard Worker #include "utils/zlib/zlib.h"
57*993b0882SAndroid Build Coastguard Worker #include "lang_id/lang-id.h"
58*993b0882SAndroid Build Coastguard Worker 
59*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
60*993b0882SAndroid Build Coastguard Worker 
61*993b0882SAndroid Build Coastguard Worker // Holds TFLite interpreters for selection and classification models.
62*993b0882SAndroid Build Coastguard Worker // NOTE: This class is not thread-safe, thus should NOT be re-used across
63*993b0882SAndroid Build Coastguard Worker // threads.
64*993b0882SAndroid Build Coastguard Worker class InterpreterManager {
65*993b0882SAndroid Build Coastguard Worker  public:
66*993b0882SAndroid Build Coastguard Worker   // The constructor can be called with nullptr for any of the executors, and is
67*993b0882SAndroid Build Coastguard Worker   // a defined behavior, as long as the corresponding *Interpreter() method is
68*993b0882SAndroid Build Coastguard Worker   // not called when the executor is null.
InterpreterManager(const ModelExecutor * selection_executor,const ModelExecutor * classification_executor)69*993b0882SAndroid Build Coastguard Worker   InterpreterManager(const ModelExecutor* selection_executor,
70*993b0882SAndroid Build Coastguard Worker                      const ModelExecutor* classification_executor)
71*993b0882SAndroid Build Coastguard Worker       : selection_executor_(selection_executor),
72*993b0882SAndroid Build Coastguard Worker         classification_executor_(classification_executor) {}
73*993b0882SAndroid Build Coastguard Worker 
74*993b0882SAndroid Build Coastguard Worker   // Gets or creates and caches an interpreter for the selection model.
75*993b0882SAndroid Build Coastguard Worker   tflite::Interpreter* SelectionInterpreter();
76*993b0882SAndroid Build Coastguard Worker 
77*993b0882SAndroid Build Coastguard Worker   // Gets or creates and caches an interpreter for the classification model.
78*993b0882SAndroid Build Coastguard Worker   tflite::Interpreter* ClassificationInterpreter();
79*993b0882SAndroid Build Coastguard Worker 
80*993b0882SAndroid Build Coastguard Worker  private:
81*993b0882SAndroid Build Coastguard Worker   const ModelExecutor* selection_executor_;
82*993b0882SAndroid Build Coastguard Worker   const ModelExecutor* classification_executor_;
83*993b0882SAndroid Build Coastguard Worker 
84*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<tflite::Interpreter> selection_interpreter_;
85*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<tflite::Interpreter> classification_interpreter_;
86*993b0882SAndroid Build Coastguard Worker };
87*993b0882SAndroid Build Coastguard Worker 
88*993b0882SAndroid Build Coastguard Worker // Stores entity types enabled for annotation, and provides operator() for
89*993b0882SAndroid Build Coastguard Worker // checking whether a given entity type is enabled.
90*993b0882SAndroid Build Coastguard Worker class EnabledEntityTypes {
91*993b0882SAndroid Build Coastguard Worker  public:
EnabledEntityTypes(const std::unordered_set<std::string> & entity_types)92*993b0882SAndroid Build Coastguard Worker   explicit EnabledEntityTypes(
93*993b0882SAndroid Build Coastguard Worker       const std::unordered_set<std::string>& entity_types)
94*993b0882SAndroid Build Coastguard Worker       : entity_types_(entity_types) {}
95*993b0882SAndroid Build Coastguard Worker 
operator()96*993b0882SAndroid Build Coastguard Worker   bool operator()(const std::string& entity_type) const {
97*993b0882SAndroid Build Coastguard Worker     return entity_types_.empty() ||
98*993b0882SAndroid Build Coastguard Worker            entity_types_.find(entity_type) != entity_types_.cend();
99*993b0882SAndroid Build Coastguard Worker   }
100*993b0882SAndroid Build Coastguard Worker 
101*993b0882SAndroid Build Coastguard Worker  private:
102*993b0882SAndroid Build Coastguard Worker   const std::unordered_set<std::string>& entity_types_;
103*993b0882SAndroid Build Coastguard Worker };
104*993b0882SAndroid Build Coastguard Worker 
105*993b0882SAndroid Build Coastguard Worker // A text processing model that provides text classification, annotation,
106*993b0882SAndroid Build Coastguard Worker // selection suggestion for various types.
107*993b0882SAndroid Build Coastguard Worker // NOTE: This class is not thread-safe.
108*993b0882SAndroid Build Coastguard Worker class Annotator {
109*993b0882SAndroid Build Coastguard Worker  public:
110*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromUnownedBuffer(
111*993b0882SAndroid Build Coastguard Worker       const char* buffer, int size, const UniLib* unilib = nullptr,
112*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
113*993b0882SAndroid Build Coastguard Worker   // Copies the underlying model buffer string.
114*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromString(
115*993b0882SAndroid Build Coastguard Worker       const std::string& buffer, const UniLib* unilib = nullptr,
116*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
117*993b0882SAndroid Build Coastguard Worker   // Takes ownership of the mmap.
118*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromScopedMmap(
119*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<ScopedMmap>* mmap, const UniLib* unilib = nullptr,
120*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
121*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromScopedMmap(
122*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<ScopedMmap>* mmap, std::unique_ptr<UniLib> unilib,
123*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<CalendarLib> calendarlib);
124*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromFileDescriptor(
125*993b0882SAndroid Build Coastguard Worker       int fd, int offset, int size, const UniLib* unilib = nullptr,
126*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
127*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromFileDescriptor(
128*993b0882SAndroid Build Coastguard Worker       int fd, int offset, int size, std::unique_ptr<UniLib> unilib,
129*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<CalendarLib> calendarlib);
130*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromFileDescriptor(
131*993b0882SAndroid Build Coastguard Worker       int fd, const UniLib* unilib = nullptr,
132*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
133*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromFileDescriptor(
134*993b0882SAndroid Build Coastguard Worker       int fd, std::unique_ptr<UniLib> unilib,
135*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<CalendarLib> calendarlib);
136*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromPath(
137*993b0882SAndroid Build Coastguard Worker       const std::string& path, const UniLib* unilib = nullptr,
138*993b0882SAndroid Build Coastguard Worker       const CalendarLib* calendarlib = nullptr);
139*993b0882SAndroid Build Coastguard Worker   static std::unique_ptr<Annotator> FromPath(
140*993b0882SAndroid Build Coastguard Worker       const std::string& path, std::unique_ptr<UniLib> unilib,
141*993b0882SAndroid Build Coastguard Worker       std::unique_ptr<CalendarLib> calendarlib);
142*993b0882SAndroid Build Coastguard Worker 
143*993b0882SAndroid Build Coastguard Worker   // Returns true if the model is ready for use.
IsInitialized()144*993b0882SAndroid Build Coastguard Worker   bool IsInitialized() { return initialized_; }
145*993b0882SAndroid Build Coastguard Worker 
146*993b0882SAndroid Build Coastguard Worker   // Initializes the knowledge engine with the given config.
147*993b0882SAndroid Build Coastguard Worker   bool InitializeKnowledgeEngine(const std::string& serialized_config);
148*993b0882SAndroid Build Coastguard Worker 
149*993b0882SAndroid Build Coastguard Worker   // Initializes the contact engine with the given config.
150*993b0882SAndroid Build Coastguard Worker   bool InitializeContactEngine(const std::string& serialized_config);
151*993b0882SAndroid Build Coastguard Worker 
152*993b0882SAndroid Build Coastguard Worker   // Cleans up the resources associated with the contact engine.
153*993b0882SAndroid Build Coastguard Worker   void CleanUpContactEngine();
154*993b0882SAndroid Build Coastguard Worker 
155*993b0882SAndroid Build Coastguard Worker   // Initializes the installed app engine with the given config.
156*993b0882SAndroid Build Coastguard Worker   bool InitializeInstalledAppEngine(const std::string& serialized_config);
157*993b0882SAndroid Build Coastguard Worker 
158*993b0882SAndroid Build Coastguard Worker   // Initializes the person name engine with the given person name model in the
159*993b0882SAndroid Build Coastguard Worker   // provided buffer. The buffer needs to outlive the annotator.
160*993b0882SAndroid Build Coastguard Worker   bool InitializePersonNameEngineFromUnownedBuffer(const void* buffer,
161*993b0882SAndroid Build Coastguard Worker                                                    int size);
162*993b0882SAndroid Build Coastguard Worker 
163*993b0882SAndroid Build Coastguard Worker   // Initializes the person name engine with the given person name model from
164*993b0882SAndroid Build Coastguard Worker   // the provided mmap.
165*993b0882SAndroid Build Coastguard Worker   bool InitializePersonNameEngineFromScopedMmap(const ScopedMmap& mmap);
166*993b0882SAndroid Build Coastguard Worker 
167*993b0882SAndroid Build Coastguard Worker   // Initializes the person name engine with the given person name model in the
168*993b0882SAndroid Build Coastguard Worker   // provided file path.
169*993b0882SAndroid Build Coastguard Worker   bool InitializePersonNameEngineFromPath(const std::string& path);
170*993b0882SAndroid Build Coastguard Worker 
171*993b0882SAndroid Build Coastguard Worker   // Initializes the person name engine with the given person name model in the
172*993b0882SAndroid Build Coastguard Worker   // provided file descriptor.
173*993b0882SAndroid Build Coastguard Worker   bool InitializePersonNameEngineFromFileDescriptor(int fd, int offset,
174*993b0882SAndroid Build Coastguard Worker                                                     int size);
175*993b0882SAndroid Build Coastguard Worker 
176*993b0882SAndroid Build Coastguard Worker   // Initializes the experimental annotators if available.
177*993b0882SAndroid Build Coastguard Worker   // Returns true if there is an implementation of experimental annotators
178*993b0882SAndroid Build Coastguard Worker   // linked in.
179*993b0882SAndroid Build Coastguard Worker   bool InitializeExperimentalAnnotators();
180*993b0882SAndroid Build Coastguard Worker 
181*993b0882SAndroid Build Coastguard Worker   // Sets up the lang-id instance that should be used.
182*993b0882SAndroid Build Coastguard Worker   bool SetLangId(const libtextclassifier3::mobile::lang_id::LangId* lang_id);
183*993b0882SAndroid Build Coastguard Worker 
184*993b0882SAndroid Build Coastguard Worker   // Runs inference for given a context and current selection (i.e. index
185*993b0882SAndroid Build Coastguard Worker   // of the first and one past last selected characters (utf8 codepoint
186*993b0882SAndroid Build Coastguard Worker   // offsets)). Returns the indices (utf8 codepoint offsets) of the selection
187*993b0882SAndroid Build Coastguard Worker   // beginning character and one past selection end character.
188*993b0882SAndroid Build Coastguard Worker   // Returns the original click_indices if an error occurs.
189*993b0882SAndroid Build Coastguard Worker   // NOTE: The selection indices are passed in and returned in terms of
190*993b0882SAndroid Build Coastguard Worker   // UTF8 codepoints (not bytes).
191*993b0882SAndroid Build Coastguard Worker   // Requires that the model is a smart selection model.
192*993b0882SAndroid Build Coastguard Worker   CodepointSpan SuggestSelection(
193*993b0882SAndroid Build Coastguard Worker       const std::string& context, CodepointSpan click_indices,
194*993b0882SAndroid Build Coastguard Worker       const SelectionOptions& options = SelectionOptions()) const;
195*993b0882SAndroid Build Coastguard Worker 
196*993b0882SAndroid Build Coastguard Worker   // Classifies the selected text given the context string.
197*993b0882SAndroid Build Coastguard Worker   // Returns an empty result if an error occurs.
198*993b0882SAndroid Build Coastguard Worker   std::vector<ClassificationResult> ClassifyText(
199*993b0882SAndroid Build Coastguard Worker       const std::string& context, const CodepointSpan& selection_indices,
200*993b0882SAndroid Build Coastguard Worker       const ClassificationOptions& options = ClassificationOptions()) const;
201*993b0882SAndroid Build Coastguard Worker 
202*993b0882SAndroid Build Coastguard Worker   // Annotates the given structed input request. Models which handle the full
203*993b0882SAndroid Build Coastguard Worker   // context request will receive all the metadata they require. While models
204*993b0882SAndroid Build Coastguard Worker   // that don't use the extra context are called using only a string.
205*993b0882SAndroid Build Coastguard Worker   // For each fragment the annotations are sorted by their position in
206*993b0882SAndroid Build Coastguard Worker   // the fragment and exclude spans classified as 'other'.
207*993b0882SAndroid Build Coastguard Worker   //
208*993b0882SAndroid Build Coastguard Worker   // The number of vectors of annotated spans will match the number
209*993b0882SAndroid Build Coastguard Worker   // of input fragments. The order of annotation span vectors will match the
210*993b0882SAndroid Build Coastguard Worker   // order of input fragments. If annotation is not possible for any of the
211*993b0882SAndroid Build Coastguard Worker   // annotators, no annotation is returned.
212*993b0882SAndroid Build Coastguard Worker   StatusOr<Annotations> AnnotateStructuredInput(
213*993b0882SAndroid Build Coastguard Worker       const std::vector<InputFragment>& string_fragments,
214*993b0882SAndroid Build Coastguard Worker       const AnnotationOptions& options = AnnotationOptions()) const;
215*993b0882SAndroid Build Coastguard Worker 
216*993b0882SAndroid Build Coastguard Worker   // Annotates given input text. The annotations are sorted by their position
217*993b0882SAndroid Build Coastguard Worker   // in the context string and exclude spans classified as 'other'.
218*993b0882SAndroid Build Coastguard Worker   std::vector<AnnotatedSpan> Annotate(
219*993b0882SAndroid Build Coastguard Worker       const std::string& context,
220*993b0882SAndroid Build Coastguard Worker       const AnnotationOptions& options = AnnotationOptions()) const;
221*993b0882SAndroid Build Coastguard Worker 
222*993b0882SAndroid Build Coastguard Worker   // Looks up a knowledge entity by its id. Returns the serialized knowledge
223*993b0882SAndroid Build Coastguard Worker   // result.
224*993b0882SAndroid Build Coastguard Worker   StatusOr<std::string> LookUpKnowledgeEntity(const std::string& id) const;
225*993b0882SAndroid Build Coastguard Worker 
226*993b0882SAndroid Build Coastguard Worker   // Looks up an entity's property.
227*993b0882SAndroid Build Coastguard Worker   StatusOr<std::string> LookUpKnowledgeEntityProperty(
228*993b0882SAndroid Build Coastguard Worker       const std::string& mid_str, const std::string& property) const;
229*993b0882SAndroid Build Coastguard Worker 
230*993b0882SAndroid Build Coastguard Worker   const Model* model() const;
231*993b0882SAndroid Build Coastguard Worker   const reflection::Schema* entity_data_schema() const;
232*993b0882SAndroid Build Coastguard Worker 
233*993b0882SAndroid Build Coastguard Worker   // Exposes the feature processor for tests and evaluations.
234*993b0882SAndroid Build Coastguard Worker   const FeatureProcessor* SelectionFeatureProcessorForTests() const;
235*993b0882SAndroid Build Coastguard Worker   const FeatureProcessor* ClassificationFeatureProcessorForTests() const;
236*993b0882SAndroid Build Coastguard Worker 
237*993b0882SAndroid Build Coastguard Worker   // Exposes the date time parser for tests and evaluations.
238*993b0882SAndroid Build Coastguard Worker   const DatetimeParser* DatetimeParserForTests() const;
239*993b0882SAndroid Build Coastguard Worker 
240*993b0882SAndroid Build Coastguard Worker   static const std::string& kPhoneCollection;
241*993b0882SAndroid Build Coastguard Worker   static const std::string& kAddressCollection;
242*993b0882SAndroid Build Coastguard Worker   static const std::string& kDateCollection;
243*993b0882SAndroid Build Coastguard Worker   static const std::string& kUrlCollection;
244*993b0882SAndroid Build Coastguard Worker   static const std::string& kEmailCollection;
245*993b0882SAndroid Build Coastguard Worker 
246*993b0882SAndroid Build Coastguard Worker  protected:
247*993b0882SAndroid Build Coastguard Worker   struct ScoredChunk {
248*993b0882SAndroid Build Coastguard Worker     TokenSpan token_span;
249*993b0882SAndroid Build Coastguard Worker     float score;
250*993b0882SAndroid Build Coastguard Worker   };
251*993b0882SAndroid Build Coastguard Worker 
252*993b0882SAndroid Build Coastguard Worker   // NOTE: ValidateAndInitialize needs to be called before any other method.
Annotator()253*993b0882SAndroid Build Coastguard Worker   Annotator() : initialized_(false) {}
254*993b0882SAndroid Build Coastguard Worker 
255*993b0882SAndroid Build Coastguard Worker   // Checks that model contains all required fields, and initializes internal
256*993b0882SAndroid Build Coastguard Worker   // datastructures.
257*993b0882SAndroid Build Coastguard Worker   // Needs to be called before any other method is.
258*993b0882SAndroid Build Coastguard Worker   void ValidateAndInitialize(const Model* model, const UniLib* unilib,
259*993b0882SAndroid Build Coastguard Worker                              const CalendarLib* calendarlib);
260*993b0882SAndroid Build Coastguard Worker 
261*993b0882SAndroid Build Coastguard Worker   // Initializes regular expressions for the regex model.
262*993b0882SAndroid Build Coastguard Worker   bool InitializeRegexModel(ZlibDecompressor* decompressor);
263*993b0882SAndroid Build Coastguard Worker 
264*993b0882SAndroid Build Coastguard Worker   // Resolves conflicts in the list of candidates by removing some overlapping
265*993b0882SAndroid Build Coastguard Worker   // ones. Returns indices of the surviving ones.
266*993b0882SAndroid Build Coastguard Worker   // NOTE: Assumes that the candidates are sorted according to their position in
267*993b0882SAndroid Build Coastguard Worker   // the span.
268*993b0882SAndroid Build Coastguard Worker   bool ResolveConflicts(const std::vector<AnnotatedSpan>& candidates,
269*993b0882SAndroid Build Coastguard Worker                         const std::string& context,
270*993b0882SAndroid Build Coastguard Worker                         const std::vector<Token>& cached_tokens,
271*993b0882SAndroid Build Coastguard Worker                         const std::vector<Locale>& detected_text_language_tags,
272*993b0882SAndroid Build Coastguard Worker                         const BaseOptions& options,
273*993b0882SAndroid Build Coastguard Worker                         InterpreterManager* interpreter_manager,
274*993b0882SAndroid Build Coastguard Worker                         std::vector<int>* result) const;
275*993b0882SAndroid Build Coastguard Worker 
276*993b0882SAndroid Build Coastguard Worker   // Resolves one conflict between candidates on indices 'start_index'
277*993b0882SAndroid Build Coastguard Worker   // (inclusive) and 'end_index' (exclusive). Assigns the winning candidate
278*993b0882SAndroid Build Coastguard Worker   // indices to 'chosen_indices'. Returns false if a problem arises.
279*993b0882SAndroid Build Coastguard Worker   bool ResolveConflict(const std::string& context,
280*993b0882SAndroid Build Coastguard Worker                        const std::vector<Token>& cached_tokens,
281*993b0882SAndroid Build Coastguard Worker                        const std::vector<AnnotatedSpan>& candidates,
282*993b0882SAndroid Build Coastguard Worker                        const std::vector<Locale>& detected_text_language_tags,
283*993b0882SAndroid Build Coastguard Worker                        int start_index, int end_index,
284*993b0882SAndroid Build Coastguard Worker                        const BaseOptions& options,
285*993b0882SAndroid Build Coastguard Worker                        InterpreterManager* interpreter_manager,
286*993b0882SAndroid Build Coastguard Worker                        std::vector<int>* chosen_indices) const;
287*993b0882SAndroid Build Coastguard Worker 
288*993b0882SAndroid Build Coastguard Worker   // Gets selection candidates from the ML model.
289*993b0882SAndroid Build Coastguard Worker   // Provides the tokens produced during tokenization of the context string for
290*993b0882SAndroid Build Coastguard Worker   // reuse.
291*993b0882SAndroid Build Coastguard Worker   bool ModelSuggestSelection(
292*993b0882SAndroid Build Coastguard Worker       const UnicodeText& context_unicode, const CodepointSpan& click_indices,
293*993b0882SAndroid Build Coastguard Worker       const std::vector<Locale>& detected_text_language_tags,
294*993b0882SAndroid Build Coastguard Worker       InterpreterManager* interpreter_manager, std::vector<Token>* tokens,
295*993b0882SAndroid Build Coastguard Worker       std::vector<AnnotatedSpan>* result) const;
296*993b0882SAndroid Build Coastguard Worker 
297*993b0882SAndroid Build Coastguard Worker   // Classifies the selected text given the context string with the
298*993b0882SAndroid Build Coastguard Worker   // classification model.
299*993b0882SAndroid Build Coastguard Worker   // The following arguments are optional:
300*993b0882SAndroid Build Coastguard Worker   //   - cached_tokens - can be given as empty
301*993b0882SAndroid Build Coastguard Worker   //   - embedding_cache - can be given as nullptr
302*993b0882SAndroid Build Coastguard Worker   //   - tokens - can be given as nullptr
303*993b0882SAndroid Build Coastguard Worker   // Returns true if no error occurred.
304*993b0882SAndroid Build Coastguard Worker   bool ModelClassifyText(
305*993b0882SAndroid Build Coastguard Worker       const std::string& context, const std::vector<Token>& cached_tokens,
306*993b0882SAndroid Build Coastguard Worker       const std::vector<Locale>& detected_text_language_tags,
307*993b0882SAndroid Build Coastguard Worker       const CodepointSpan& selection_indices, const BaseOptions& options,
308*993b0882SAndroid Build Coastguard Worker       InterpreterManager* interpreter_manager,
309*993b0882SAndroid Build Coastguard Worker       FeatureProcessor::EmbeddingCache* embedding_cache,
310*993b0882SAndroid Build Coastguard Worker       std::vector<ClassificationResult>* classification_results,
311*993b0882SAndroid Build Coastguard Worker       std::vector<Token>* tokens) const;
312*993b0882SAndroid Build Coastguard Worker 
313*993b0882SAndroid Build Coastguard Worker   // Same as above, but (for optimization) takes the context as UnicodeText and
314*993b0882SAndroid Build Coastguard Worker   // takes the following extra arguments:
315*993b0882SAndroid Build Coastguard Worker   //   - span_begin, span_end - iterators in context_unicode corresponding to
316*993b0882SAndroid Build Coastguard Worker   //     selection_indices
317*993b0882SAndroid Build Coastguard Worker   //   - line - a UnicodeTextRange within context_unicode corresponding to the
318*993b0882SAndroid Build Coastguard Worker   //     line containing the selection - optional, can be given as nullptr
319*993b0882SAndroid Build Coastguard Worker   bool ModelClassifyText(
320*993b0882SAndroid Build Coastguard Worker       const UnicodeText& context_unicode,
321*993b0882SAndroid Build Coastguard Worker       const std::vector<Token>& cached_tokens,
322*993b0882SAndroid Build Coastguard Worker       const std::vector<Locale>& detected_text_language_tags,
323*993b0882SAndroid Build Coastguard Worker       const UnicodeText::const_iterator& span_begin,
324*993b0882SAndroid Build Coastguard Worker       const UnicodeText::const_iterator& span_end, const UnicodeTextRange* line,
325*993b0882SAndroid Build Coastguard Worker       const CodepointSpan& selection_indices, const BaseOptions& options,
326*993b0882SAndroid Build Coastguard Worker       InterpreterManager* interpreter_manager,
327*993b0882SAndroid Build Coastguard Worker       FeatureProcessor::EmbeddingCache* embedding_cache,
328*993b0882SAndroid Build Coastguard Worker       std::vector<ClassificationResult>* classification_results,
329*993b0882SAndroid Build Coastguard Worker       std::vector<Token>* tokens) const;
330*993b0882SAndroid Build Coastguard Worker 
331*993b0882SAndroid Build Coastguard Worker   // Returns a relative token span that represents how many tokens on the left
332*993b0882SAndroid Build Coastguard Worker   // from the selection and right from the selection are needed for the
333*993b0882SAndroid Build Coastguard Worker   // classifier input.
334*993b0882SAndroid Build Coastguard Worker   TokenSpan ClassifyTextUpperBoundNeededTokens() const;
335*993b0882SAndroid Build Coastguard Worker 
336*993b0882SAndroid Build Coastguard Worker   // Classifies the selected text with the regular expressions models.
337*993b0882SAndroid Build Coastguard Worker   // Returns true if no error happened, false otherwise.
338*993b0882SAndroid Build Coastguard Worker   bool RegexClassifyText(
339*993b0882SAndroid Build Coastguard Worker       const std::string& context, const CodepointSpan& selection_indices,
340*993b0882SAndroid Build Coastguard Worker       std::vector<ClassificationResult>* classification_result) const;
341*993b0882SAndroid Build Coastguard Worker 
342*993b0882SAndroid Build Coastguard Worker   // Classifies the selected text with the date time model.
343*993b0882SAndroid Build Coastguard Worker   // Returns true if no error happened, false otherwise.
344*993b0882SAndroid Build Coastguard Worker   bool DatetimeClassifyText(
345*993b0882SAndroid Build Coastguard Worker       const std::string& context, const CodepointSpan& selection_indices,
346*993b0882SAndroid Build Coastguard Worker       const ClassificationOptions& options,
347*993b0882SAndroid Build Coastguard Worker       std::vector<ClassificationResult>* classification_results) const;
348*993b0882SAndroid Build Coastguard Worker 
349*993b0882SAndroid Build Coastguard Worker   // Chunks given input text with the selection model and classifies the spans
350*993b0882SAndroid Build Coastguard Worker   // with the classification model.
351*993b0882SAndroid Build Coastguard Worker   // The annotations are sorted by their position in the context string and
352*993b0882SAndroid Build Coastguard Worker   // exclude spans classified as 'other'.
353*993b0882SAndroid Build Coastguard Worker   // Provides the tokens produced during tokenization of the context string for
354*993b0882SAndroid Build Coastguard Worker   // reuse.
355*993b0882SAndroid Build Coastguard Worker   bool ModelAnnotate(const std::string& context,
356*993b0882SAndroid Build Coastguard Worker                      const std::vector<Locale>& detected_text_language_tags,
357*993b0882SAndroid Build Coastguard Worker                      const AnnotationOptions& options,
358*993b0882SAndroid Build Coastguard Worker                      InterpreterManager* interpreter_manager,
359*993b0882SAndroid Build Coastguard Worker                      std::vector<Token>* tokens,
360*993b0882SAndroid Build Coastguard Worker                      std::vector<AnnotatedSpan>* result) const;
361*993b0882SAndroid Build Coastguard Worker 
362*993b0882SAndroid Build Coastguard Worker   // Groups the tokens into chunks. A chunk is a token span that should be the
363*993b0882SAndroid Build Coastguard Worker   // suggested selection when any of its contained tokens is clicked. The chunks
364*993b0882SAndroid Build Coastguard Worker   // are non-overlapping and are sorted by their position in the context string.
365*993b0882SAndroid Build Coastguard Worker   // "num_tokens" is the total number of tokens available (as this method does
366*993b0882SAndroid Build Coastguard Worker   // not need the actual vector of tokens).
367*993b0882SAndroid Build Coastguard Worker   // "span_of_interest" is a span of all the tokens that could be clicked.
368*993b0882SAndroid Build Coastguard Worker   // The resulting chunks all have to overlap with it and they cover this span
369*993b0882SAndroid Build Coastguard Worker   // completely. The first and last chunk might extend beyond it.
370*993b0882SAndroid Build Coastguard Worker   // The chunks vector is cleared before filling.
371*993b0882SAndroid Build Coastguard Worker   bool ModelChunk(int num_tokens, const TokenSpan& span_of_interest,
372*993b0882SAndroid Build Coastguard Worker                   tflite::Interpreter* selection_interpreter,
373*993b0882SAndroid Build Coastguard Worker                   const CachedFeatures& cached_features,
374*993b0882SAndroid Build Coastguard Worker                   std::vector<TokenSpan>* chunks) const;
375*993b0882SAndroid Build Coastguard Worker 
376*993b0882SAndroid Build Coastguard Worker   // A helper method for ModelChunk(). It generates scored chunk candidates for
377*993b0882SAndroid Build Coastguard Worker   // a click context model.
378*993b0882SAndroid Build Coastguard Worker   // NOTE: The returned chunks can (and most likely do) overlap.
379*993b0882SAndroid Build Coastguard Worker   bool ModelClickContextScoreChunks(
380*993b0882SAndroid Build Coastguard Worker       int num_tokens, const TokenSpan& span_of_interest,
381*993b0882SAndroid Build Coastguard Worker       const CachedFeatures& cached_features,
382*993b0882SAndroid Build Coastguard Worker       tflite::Interpreter* selection_interpreter,
383*993b0882SAndroid Build Coastguard Worker       std::vector<ScoredChunk>* scored_chunks) const;
384*993b0882SAndroid Build Coastguard Worker 
385*993b0882SAndroid Build Coastguard Worker   // A helper method for ModelChunk(). It generates scored chunk candidates for
386*993b0882SAndroid Build Coastguard Worker   // a bounds-sensitive model.
387*993b0882SAndroid Build Coastguard Worker   // NOTE: The returned chunks can (and most likely do) overlap.
388*993b0882SAndroid Build Coastguard Worker   bool ModelBoundsSensitiveScoreChunks(
389*993b0882SAndroid Build Coastguard Worker       int num_tokens, const TokenSpan& span_of_interest,
390*993b0882SAndroid Build Coastguard Worker       const TokenSpan& inference_span, const CachedFeatures& cached_features,
391*993b0882SAndroid Build Coastguard Worker       tflite::Interpreter* selection_interpreter,
392*993b0882SAndroid Build Coastguard Worker       std::vector<ScoredChunk>* scored_chunks) const;
393*993b0882SAndroid Build Coastguard Worker 
394*993b0882SAndroid Build Coastguard Worker   // Produces chunks isolated by a set of regular expressions.
395*993b0882SAndroid Build Coastguard Worker   bool RegexChunk(const UnicodeText& context_unicode,
396*993b0882SAndroid Build Coastguard Worker                   const std::vector<int>& rules,
397*993b0882SAndroid Build Coastguard Worker                   bool is_serialized_entity_data_enabled,
398*993b0882SAndroid Build Coastguard Worker                   const EnabledEntityTypes& enabled_entity_types,
399*993b0882SAndroid Build Coastguard Worker                   const AnnotationUsecase& annotation_usecase,
400*993b0882SAndroid Build Coastguard Worker 
401*993b0882SAndroid Build Coastguard Worker                   std::vector<AnnotatedSpan>* result) const;
402*993b0882SAndroid Build Coastguard Worker 
403*993b0882SAndroid Build Coastguard Worker   // Produces chunks from the datetime parser.
404*993b0882SAndroid Build Coastguard Worker   bool DatetimeChunk(const UnicodeText& context_unicode,
405*993b0882SAndroid Build Coastguard Worker                      int64 reference_time_ms_utc,
406*993b0882SAndroid Build Coastguard Worker                      const std::string& reference_timezone,
407*993b0882SAndroid Build Coastguard Worker                      const std::string& locales, ModeFlag mode,
408*993b0882SAndroid Build Coastguard Worker                      AnnotationUsecase annotation_usecase,
409*993b0882SAndroid Build Coastguard Worker                      bool is_serialized_entity_data_enabled,
410*993b0882SAndroid Build Coastguard Worker                      std::vector<AnnotatedSpan>* result) const;
411*993b0882SAndroid Build Coastguard Worker 
412*993b0882SAndroid Build Coastguard Worker   // Returns whether a classification should be filtered.
413*993b0882SAndroid Build Coastguard Worker   bool FilteredForAnnotation(const AnnotatedSpan& span) const;
414*993b0882SAndroid Build Coastguard Worker   bool FilteredForClassification(
415*993b0882SAndroid Build Coastguard Worker       const ClassificationResult& classification) const;
416*993b0882SAndroid Build Coastguard Worker   bool FilteredForSelection(const AnnotatedSpan& span) const;
417*993b0882SAndroid Build Coastguard Worker 
418*993b0882SAndroid Build Coastguard Worker   // Computes the selection boundaries from a regular expression match.
419*993b0882SAndroid Build Coastguard Worker   CodepointSpan ComputeSelectionBoundaries(
420*993b0882SAndroid Build Coastguard Worker       const UniLib::RegexMatcher* match,
421*993b0882SAndroid Build Coastguard Worker       const RegexModel_::Pattern* config) const;
422*993b0882SAndroid Build Coastguard Worker 
423*993b0882SAndroid Build Coastguard Worker   // Returns whether a regex pattern provides entity data from a match.
424*993b0882SAndroid Build Coastguard Worker   bool HasEntityData(const RegexModel_::Pattern* pattern) const;
425*993b0882SAndroid Build Coastguard Worker 
426*993b0882SAndroid Build Coastguard Worker   // Constructs and serializes entity data from regex matches.
427*993b0882SAndroid Build Coastguard Worker   bool SerializedEntityDataFromRegexMatch(
428*993b0882SAndroid Build Coastguard Worker       const RegexModel_::Pattern* pattern, UniLib::RegexMatcher* matcher,
429*993b0882SAndroid Build Coastguard Worker       std::string* serialized_entity_data) const;
430*993b0882SAndroid Build Coastguard Worker 
431*993b0882SAndroid Build Coastguard Worker   // For knowledge candidates which have a ContactPointer, fill in the
432*993b0882SAndroid Build Coastguard Worker   // appropriate contact metadata, if possible.
433*993b0882SAndroid Build Coastguard Worker   void AddContactMetadataToKnowledgeClassificationResults(
434*993b0882SAndroid Build Coastguard Worker       std::vector<AnnotatedSpan>* candidates) const;
435*993b0882SAndroid Build Coastguard Worker 
436*993b0882SAndroid Build Coastguard Worker   // Gets priority score from the list of classification results.
437*993b0882SAndroid Build Coastguard Worker   float GetPriorityScore(
438*993b0882SAndroid Build Coastguard Worker       const std::vector<ClassificationResult>& classification) const;
439*993b0882SAndroid Build Coastguard Worker 
440*993b0882SAndroid Build Coastguard Worker   // Verifies a regex match and returns true if verification was successful.
441*993b0882SAndroid Build Coastguard Worker   bool VerifyRegexMatchCandidate(
442*993b0882SAndroid Build Coastguard Worker       const std::string& context,
443*993b0882SAndroid Build Coastguard Worker       const VerificationOptions* verification_options, const std::string& match,
444*993b0882SAndroid Build Coastguard Worker       const UniLib::RegexMatcher* matcher) const;
445*993b0882SAndroid Build Coastguard Worker 
446*993b0882SAndroid Build Coastguard Worker   const Model* model_;
447*993b0882SAndroid Build Coastguard Worker 
448*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const ModelExecutor> selection_executor_;
449*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const ModelExecutor> classification_executor_;
450*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const EmbeddingExecutor> embedding_executor_;
451*993b0882SAndroid Build Coastguard Worker 
452*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const FeatureProcessor> selection_feature_processor_;
453*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const FeatureProcessor> classification_feature_processor_;
454*993b0882SAndroid Build Coastguard Worker 
455*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const grammar::Analyzer> analyzer_;
456*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const DatetimeGrounder> datetime_grounder_;
457*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const DatetimeParser> datetime_parser_;
458*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const GrammarAnnotator> grammar_annotator_;
459*993b0882SAndroid Build Coastguard Worker 
460*993b0882SAndroid Build Coastguard Worker   std::string owned_buffer_;
461*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<UniLib> owned_unilib_;
462*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<CalendarLib> owned_calendarlib_;
463*993b0882SAndroid Build Coastguard Worker 
464*993b0882SAndroid Build Coastguard Worker  private:
465*993b0882SAndroid Build Coastguard Worker   struct CompiledRegexPattern {
466*993b0882SAndroid Build Coastguard Worker     const RegexModel_::Pattern* config;
467*993b0882SAndroid Build Coastguard Worker     std::unique_ptr<UniLib::RegexPattern> pattern;
468*993b0882SAndroid Build Coastguard Worker   };
469*993b0882SAndroid Build Coastguard Worker 
470*993b0882SAndroid Build Coastguard Worker   // Removes annotations the entity type of which is not in the set of enabled
471*993b0882SAndroid Build Coastguard Worker   // entity types.
472*993b0882SAndroid Build Coastguard Worker   void RemoveNotEnabledEntityTypes(
473*993b0882SAndroid Build Coastguard Worker       const EnabledEntityTypes& is_entity_type_enabled,
474*993b0882SAndroid Build Coastguard Worker       std::vector<AnnotatedSpan>* annotated_spans) const;
475*993b0882SAndroid Build Coastguard Worker 
476*993b0882SAndroid Build Coastguard Worker   // Runs only annotators that do not support structured input. Does conflict
477*993b0882SAndroid Build Coastguard Worker   // resolution, removal of disallowed entities and sorting on both new
478*993b0882SAndroid Build Coastguard Worker   // generated candidates and passed in entities.
479*993b0882SAndroid Build Coastguard Worker   // Returns Status::Error if the annotation failed, in which case the vector of
480*993b0882SAndroid Build Coastguard Worker   // candidates should be ignored.
481*993b0882SAndroid Build Coastguard Worker   Status AnnotateSingleInput(const std::string& context,
482*993b0882SAndroid Build Coastguard Worker                              const AnnotationOptions& options,
483*993b0882SAndroid Build Coastguard Worker                              std::vector<AnnotatedSpan>* candidates) const;
484*993b0882SAndroid Build Coastguard Worker 
485*993b0882SAndroid Build Coastguard Worker   // Parses the money amount into whole and decimal part and fills in the
486*993b0882SAndroid Build Coastguard Worker   // entity data information.
487*993b0882SAndroid Build Coastguard Worker   bool ParseAndFillInMoneyAmount(std::string* serialized_entity_data,
488*993b0882SAndroid Build Coastguard Worker                                  const UniLib::RegexMatcher* match,
489*993b0882SAndroid Build Coastguard Worker                                  const RegexModel_::Pattern* config,
490*993b0882SAndroid Build Coastguard Worker                                  const UnicodeText& context_unicode) const;
491*993b0882SAndroid Build Coastguard Worker 
492*993b0882SAndroid Build Coastguard Worker   // Given the regex capturing groups, extract the one representing the money
493*993b0882SAndroid Build Coastguard Worker   // quantity and fills in the actual string and the power of 10 the amount
494*993b0882SAndroid Build Coastguard Worker   // should be multiplied with.
495*993b0882SAndroid Build Coastguard Worker   void GetMoneyQuantityFromCapturingGroup(const UniLib::RegexMatcher* match,
496*993b0882SAndroid Build Coastguard Worker                                           const RegexModel_::Pattern* config,
497*993b0882SAndroid Build Coastguard Worker                                           const UnicodeText& context_unicode,
498*993b0882SAndroid Build Coastguard Worker                                           std::string* quantity,
499*993b0882SAndroid Build Coastguard Worker                                           int* exponent) const;
500*993b0882SAndroid Build Coastguard Worker 
501*993b0882SAndroid Build Coastguard Worker   // Returns true if any of the ff-model entity types is enabled.
502*993b0882SAndroid Build Coastguard Worker   bool IsAnyModelEntityTypeEnabled(
503*993b0882SAndroid Build Coastguard Worker       const EnabledEntityTypes& is_entity_type_enabled) const;
504*993b0882SAndroid Build Coastguard Worker 
505*993b0882SAndroid Build Coastguard Worker   // Returns true if any of the regex entity types is enabled.
506*993b0882SAndroid Build Coastguard Worker   bool IsAnyRegexEntityTypeEnabled(
507*993b0882SAndroid Build Coastguard Worker       const EnabledEntityTypes& is_entity_type_enabled) const;
508*993b0882SAndroid Build Coastguard Worker 
509*993b0882SAndroid Build Coastguard Worker   // Returns true if any of the POD NER entity types is enabled.
510*993b0882SAndroid Build Coastguard Worker   bool IsAnyPodNerEntityTypeEnabled(
511*993b0882SAndroid Build Coastguard Worker       const EnabledEntityTypes& is_entity_type_enabled) const;
512*993b0882SAndroid Build Coastguard Worker 
513*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<ScopedMmap> mmap_;
514*993b0882SAndroid Build Coastguard Worker   bool initialized_ = false;
515*993b0882SAndroid Build Coastguard Worker   bool enabled_for_annotation_ = false;
516*993b0882SAndroid Build Coastguard Worker   bool enabled_for_classification_ = false;
517*993b0882SAndroid Build Coastguard Worker   bool enabled_for_selection_ = false;
518*993b0882SAndroid Build Coastguard Worker   std::unordered_set<std::string> filtered_collections_annotation_;
519*993b0882SAndroid Build Coastguard Worker   std::unordered_set<std::string> filtered_collections_classification_;
520*993b0882SAndroid Build Coastguard Worker   std::unordered_set<std::string> filtered_collections_selection_;
521*993b0882SAndroid Build Coastguard Worker 
522*993b0882SAndroid Build Coastguard Worker   std::vector<CompiledRegexPattern> regex_patterns_;
523*993b0882SAndroid Build Coastguard Worker 
524*993b0882SAndroid Build Coastguard Worker   // Indices into regex_patterns_ for the different modes.
525*993b0882SAndroid Build Coastguard Worker   std::vector<int> annotation_regex_patterns_, classification_regex_patterns_,
526*993b0882SAndroid Build Coastguard Worker       selection_regex_patterns_;
527*993b0882SAndroid Build Coastguard Worker 
528*993b0882SAndroid Build Coastguard Worker   const UniLib* unilib_;
529*993b0882SAndroid Build Coastguard Worker   const CalendarLib* calendarlib_;
530*993b0882SAndroid Build Coastguard Worker 
531*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const KnowledgeEngine> knowledge_engine_;
532*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const ContactEngine> contact_engine_;
533*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const InstalledAppEngine> installed_app_engine_;
534*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const NumberAnnotator> number_annotator_;
535*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const DurationAnnotator> duration_annotator_;
536*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const PersonNameEngine> person_name_engine_;
537*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const TranslateAnnotator> translate_annotator_;
538*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const PodNerAnnotator> pod_ner_annotator_;
539*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const ExperimentalAnnotator> experimental_annotator_;
540*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<const VocabAnnotator> vocab_annotator_;
541*993b0882SAndroid Build Coastguard Worker 
542*993b0882SAndroid Build Coastguard Worker   // Builder for creating extra data.
543*993b0882SAndroid Build Coastguard Worker   const reflection::Schema* entity_data_schema_;
544*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<MutableFlatbufferBuilder> entity_data_builder_;
545*993b0882SAndroid Build Coastguard Worker 
546*993b0882SAndroid Build Coastguard Worker   // Locales for which the entire model triggers.
547*993b0882SAndroid Build Coastguard Worker   std::vector<Locale> model_triggering_locales_;
548*993b0882SAndroid Build Coastguard Worker 
549*993b0882SAndroid Build Coastguard Worker   // Locales for which the ML model triggers.
550*993b0882SAndroid Build Coastguard Worker   std::vector<Locale> ml_model_triggering_locales_;
551*993b0882SAndroid Build Coastguard Worker 
552*993b0882SAndroid Build Coastguard Worker   // Locales that the dictionary classification support.
553*993b0882SAndroid Build Coastguard Worker   std::vector<Locale> dictionary_locales_;
554*993b0882SAndroid Build Coastguard Worker 
555*993b0882SAndroid Build Coastguard Worker   // Decimal and thousands number separators.
556*993b0882SAndroid Build Coastguard Worker   std::unordered_set<char32> money_separators_;
557*993b0882SAndroid Build Coastguard Worker 
558*993b0882SAndroid Build Coastguard Worker   // Model for language identification.
559*993b0882SAndroid Build Coastguard Worker   const libtextclassifier3::mobile::lang_id::LangId* lang_id_ = nullptr;
560*993b0882SAndroid Build Coastguard Worker 
561*993b0882SAndroid Build Coastguard Worker   // If true, will prioritize the longest annotation during conflict resolution.
562*993b0882SAndroid Build Coastguard Worker   bool prioritize_longest_annotation_ = false;
563*993b0882SAndroid Build Coastguard Worker 
564*993b0882SAndroid Build Coastguard Worker   // If true, the annotator will perform conflict resolution between the
565*993b0882SAndroid Build Coastguard Worker   // different sub-annotators also in the RAW mode. If false, no conflict
566*993b0882SAndroid Build Coastguard Worker   // resolution will be performed in RAW mode.
567*993b0882SAndroid Build Coastguard Worker   bool do_conflict_resolution_in_raw_mode_ = true;
568*993b0882SAndroid Build Coastguard Worker };
569*993b0882SAndroid Build Coastguard Worker 
570*993b0882SAndroid Build Coastguard Worker namespace internal {
571*993b0882SAndroid Build Coastguard Worker 
572*993b0882SAndroid Build Coastguard Worker // Helper function, which if the initial 'span' contains only white-spaces,
573*993b0882SAndroid Build Coastguard Worker // moves the selection to a single-codepoint selection on the left side
574*993b0882SAndroid Build Coastguard Worker // of this block of white-space.
575*993b0882SAndroid Build Coastguard Worker CodepointSpan SnapLeftIfWhitespaceSelection(const CodepointSpan& span,
576*993b0882SAndroid Build Coastguard Worker                                             const UnicodeText& context_unicode,
577*993b0882SAndroid Build Coastguard Worker                                             const UniLib& unilib);
578*993b0882SAndroid Build Coastguard Worker 
579*993b0882SAndroid Build Coastguard Worker // Copies tokens from 'cached_tokens' that are
580*993b0882SAndroid Build Coastguard Worker // 'tokens_around_selection_to_copy' (on the left, and right) tokens distant
581*993b0882SAndroid Build Coastguard Worker // from the tokens that correspond to 'selection_indices'.
582*993b0882SAndroid Build Coastguard Worker std::vector<Token> CopyCachedTokens(const std::vector<Token>& cached_tokens,
583*993b0882SAndroid Build Coastguard Worker                                     const CodepointSpan& selection_indices,
584*993b0882SAndroid Build Coastguard Worker                                     TokenSpan tokens_around_selection_to_copy);
585*993b0882SAndroid Build Coastguard Worker }  // namespace internal
586*993b0882SAndroid Build Coastguard Worker 
587*993b0882SAndroid Build Coastguard Worker // Interprets the buffer as a Model flatbuffer and returns it for reading.
588*993b0882SAndroid Build Coastguard Worker const Model* ViewModel(const void* buffer, int size);
589*993b0882SAndroid Build Coastguard Worker 
590*993b0882SAndroid Build Coastguard Worker // Opens model from given path and runs a function, passing the loaded Model
591*993b0882SAndroid Build Coastguard Worker // flatbuffer as an argument.
592*993b0882SAndroid Build Coastguard Worker //
593*993b0882SAndroid Build Coastguard Worker // This is mainly useful if we don't want to pay the cost for the model
594*993b0882SAndroid Build Coastguard Worker // initialization because we'll be only reading some flatbuffer values from the
595*993b0882SAndroid Build Coastguard Worker // file.
596*993b0882SAndroid Build Coastguard Worker template <typename ReturnType, typename Func>
VisitAnnotatorModel(const std::string & path,Func function)597*993b0882SAndroid Build Coastguard Worker ReturnType VisitAnnotatorModel(const std::string& path, Func function) {
598*993b0882SAndroid Build Coastguard Worker   ScopedMmap mmap(path);
599*993b0882SAndroid Build Coastguard Worker   if (!mmap.handle().ok()) {
600*993b0882SAndroid Build Coastguard Worker     function(/*model=*/nullptr);
601*993b0882SAndroid Build Coastguard Worker   }
602*993b0882SAndroid Build Coastguard Worker   const Model* model =
603*993b0882SAndroid Build Coastguard Worker       ViewModel(mmap.handle().start(), mmap.handle().num_bytes());
604*993b0882SAndroid Build Coastguard Worker   return function(model);
605*993b0882SAndroid Build Coastguard Worker }
606*993b0882SAndroid Build Coastguard Worker 
607*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3
608*993b0882SAndroid Build Coastguard Worker 
609*993b0882SAndroid Build Coastguard Worker #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_
610