xref: /aosp_15_r20/external/executorch/extension/llm/tokenizer/utils.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1*523fa7a6SAndroid Build Coastguard Worker# Copyright (c) Meta Platforms, Inc. and affiliates.
2*523fa7a6SAndroid Build Coastguard Worker# All rights reserved.
3*523fa7a6SAndroid Build Coastguard Worker#
4*523fa7a6SAndroid Build Coastguard Worker# This source code is licensed under the BSD-style license found in the
5*523fa7a6SAndroid Build Coastguard Worker# LICENSE file in the root directory of this source tree.
6*523fa7a6SAndroid Build Coastguard Worker
7*523fa7a6SAndroid Build Coastguard Workerfrom executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer as Tiktoken
8*523fa7a6SAndroid Build Coastguard Workerfrom executorch.extension.llm.tokenizer.tokenizer import (
9*523fa7a6SAndroid Build Coastguard Worker    Tokenizer as SentencePieceTokenizer,
10*523fa7a6SAndroid Build Coastguard Worker)
11*523fa7a6SAndroid Build Coastguard Worker
12*523fa7a6SAndroid Build Coastguard Worker
13*523fa7a6SAndroid Build Coastguard Workerdef get_tokenizer(tokenizer_path):
14*523fa7a6SAndroid Build Coastguard Worker    try:
15*523fa7a6SAndroid Build Coastguard Worker        tokenizer = SentencePieceTokenizer(model_path=str(tokenizer_path))
16*523fa7a6SAndroid Build Coastguard Worker    except Exception:
17*523fa7a6SAndroid Build Coastguard Worker        print("Using Tiktokenizer")
18*523fa7a6SAndroid Build Coastguard Worker        tokenizer = Tiktoken(model_path=str(tokenizer_path))
19*523fa7a6SAndroid Build Coastguard Worker    return tokenizer
20