1"""Extension API for adding custom tags and behavior."""
2import pprint
3import re
4from sys import version_info
5
6from markupsafe import Markup
7
8from . import nodes
9from .defaults import BLOCK_END_STRING
10from .defaults import BLOCK_START_STRING
11from .defaults import COMMENT_END_STRING
12from .defaults import COMMENT_START_STRING
13from .defaults import KEEP_TRAILING_NEWLINE
14from .defaults import LINE_COMMENT_PREFIX
15from .defaults import LINE_STATEMENT_PREFIX
16from .defaults import LSTRIP_BLOCKS
17from .defaults import NEWLINE_SEQUENCE
18from .defaults import TRIM_BLOCKS
19from .defaults import VARIABLE_END_STRING
20from .defaults import VARIABLE_START_STRING
21from .environment import Environment
22from .exceptions import TemplateAssertionError
23from .exceptions import TemplateSyntaxError
24from .nodes import ContextReference
25from .runtime import concat
26from .utils import contextfunction
27from .utils import import_string
28
29# I18N functions available in Jinja templates. If the I18N library
30# provides ugettext, it will be assigned to gettext.
31GETTEXT_FUNCTIONS = ("_", "gettext", "ngettext")
32_ws_re = re.compile(r"\s*\n\s*")
33
34
35class ExtensionRegistry(type):
36    """Gives the extension an unique identifier."""
37
38    def __new__(mcs, name, bases, d):
39        rv = type.__new__(mcs, name, bases, d)
40        rv.identifier = f"{rv.__module__}.{rv.__name__}"
41        return rv
42
43
44class Extension(metaclass=ExtensionRegistry):
45    """Extensions can be used to add extra functionality to the Jinja template
46    system at the parser level.  Custom extensions are bound to an environment
47    but may not store environment specific data on `self`.  The reason for
48    this is that an extension can be bound to another environment (for
49    overlays) by creating a copy and reassigning the `environment` attribute.
50
51    As extensions are created by the environment they cannot accept any
52    arguments for configuration.  One may want to work around that by using
53    a factory function, but that is not possible as extensions are identified
54    by their import name.  The correct way to configure the extension is
55    storing the configuration values on the environment.  Because this way the
56    environment ends up acting as central configuration storage the
57    attributes may clash which is why extensions have to ensure that the names
58    they choose for configuration are not too generic.  ``prefix`` for example
59    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
60    name as includes the name of the extension (fragment cache).
61    """
62
63    #: if this extension parses this is the list of tags it's listening to.
64    tags = set()
65
66    #: the priority of that extension.  This is especially useful for
67    #: extensions that preprocess values.  A lower value means higher
68    #: priority.
69    #:
70    #: .. versionadded:: 2.4
71    priority = 100
72
73    def __init__(self, environment):
74        self.environment = environment
75
76    def bind(self, environment):
77        """Create a copy of this extension bound to another environment."""
78        rv = object.__new__(self.__class__)
79        rv.__dict__.update(self.__dict__)
80        rv.environment = environment
81        return rv
82
83    def preprocess(self, source, name, filename=None):
84        """This method is called before the actual lexing and can be used to
85        preprocess the source.  The `filename` is optional.  The return value
86        must be the preprocessed source.
87        """
88        return source
89
90    def filter_stream(self, stream):
91        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
92        to filter tokens returned.  This method has to return an iterable of
93        :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a
94        :class:`~jinja2.lexer.TokenStream`.
95        """
96        return stream
97
98    def parse(self, parser):
99        """If any of the :attr:`tags` matched this method is called with the
100        parser as first argument.  The token the parser stream is pointing at
101        is the name token that matched.  This method has to return one or a
102        list of multiple nodes.
103        """
104        raise NotImplementedError()
105
106    def attr(self, name, lineno=None):
107        """Return an attribute node for the current extension.  This is useful
108        to pass constants on extensions to generated template code.
109
110        ::
111
112            self.attr('_my_attribute', lineno=lineno)
113        """
114        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
115
116    def call_method(
117        self, name, args=None, kwargs=None, dyn_args=None, dyn_kwargs=None, lineno=None
118    ):
119        """Call a method of the extension.  This is a shortcut for
120        :meth:`attr` + :class:`jinja2.nodes.Call`.
121        """
122        if args is None:
123            args = []
124        if kwargs is None:
125            kwargs = []
126        return nodes.Call(
127            self.attr(name, lineno=lineno),
128            args,
129            kwargs,
130            dyn_args,
131            dyn_kwargs,
132            lineno=lineno,
133        )
134
135
136@contextfunction
137def _gettext_alias(__context, *args, **kwargs):
138    return __context.call(__context.resolve("gettext"), *args, **kwargs)
139
140
141def _make_new_gettext(func):
142    @contextfunction
143    def gettext(__context, __string, **variables):
144        rv = __context.call(func, __string)
145        if __context.eval_ctx.autoescape:
146            rv = Markup(rv)
147        # Always treat as a format string, even if there are no
148        # variables. This makes translation strings more consistent
149        # and predictable. This requires escaping
150        return rv % variables
151
152    return gettext
153
154
155def _make_new_ngettext(func):
156    @contextfunction
157    def ngettext(__context, __singular, __plural, __num, **variables):
158        variables.setdefault("num", __num)
159        rv = __context.call(func, __singular, __plural, __num)
160        if __context.eval_ctx.autoescape:
161            rv = Markup(rv)
162        # Always treat as a format string, see gettext comment above.
163        return rv % variables
164
165    return ngettext
166
167
168class InternationalizationExtension(Extension):
169    """This extension adds gettext support to Jinja."""
170
171    tags = {"trans"}
172
173    # TODO: the i18n extension is currently reevaluating values in a few
174    # situations.  Take this example:
175    #   {% trans count=something() %}{{ count }} foo{% pluralize
176    #     %}{{ count }} fooss{% endtrans %}
177    # something is called twice here.  One time for the gettext value and
178    # the other time for the n-parameter of the ngettext function.
179
180    def __init__(self, environment):
181        Extension.__init__(self, environment)
182        environment.globals["_"] = _gettext_alias
183        environment.extend(
184            install_gettext_translations=self._install,
185            install_null_translations=self._install_null,
186            install_gettext_callables=self._install_callables,
187            uninstall_gettext_translations=self._uninstall,
188            extract_translations=self._extract,
189            newstyle_gettext=False,
190        )
191
192    def _install(self, translations, newstyle=None):
193        # ugettext and ungettext are preferred in case the I18N library
194        # is providing compatibility with older Python versions.
195        gettext = getattr(translations, "ugettext", None)
196        if gettext is None:
197            gettext = translations.gettext
198        ngettext = getattr(translations, "ungettext", None)
199        if ngettext is None:
200            ngettext = translations.ngettext
201        self._install_callables(gettext, ngettext, newstyle)
202
203    def _install_null(self, newstyle=None):
204        self._install_callables(
205            lambda x: x, lambda s, p, n: s if n == 1 else p, newstyle
206        )
207
208    def _install_callables(self, gettext, ngettext, newstyle=None):
209        if newstyle is not None:
210            self.environment.newstyle_gettext = newstyle
211        if self.environment.newstyle_gettext:
212            gettext = _make_new_gettext(gettext)
213            ngettext = _make_new_ngettext(ngettext)
214        self.environment.globals.update(gettext=gettext, ngettext=ngettext)
215
216    def _uninstall(self, translations):
217        for key in "gettext", "ngettext":
218            self.environment.globals.pop(key, None)
219
220    def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
221        if isinstance(source, str):
222            source = self.environment.parse(source)
223        return extract_from_ast(source, gettext_functions)
224
225    def parse(self, parser):
226        """Parse a translatable tag."""
227        lineno = next(parser.stream).lineno
228        num_called_num = False
229
230        # find all the variables referenced.  Additionally a variable can be
231        # defined in the body of the trans block too, but this is checked at
232        # a later state.
233        plural_expr = None
234        plural_expr_assignment = None
235        variables = {}
236        trimmed = None
237        while parser.stream.current.type != "block_end":
238            if variables:
239                parser.stream.expect("comma")
240
241            # skip colon for python compatibility
242            if parser.stream.skip_if("colon"):
243                break
244
245            name = parser.stream.expect("name")
246            if name.value in variables:
247                parser.fail(
248                    f"translatable variable {name.value!r} defined twice.",
249                    name.lineno,
250                    exc=TemplateAssertionError,
251                )
252
253            # expressions
254            if parser.stream.current.type == "assign":
255                next(parser.stream)
256                variables[name.value] = var = parser.parse_expression()
257            elif trimmed is None and name.value in ("trimmed", "notrimmed"):
258                trimmed = name.value == "trimmed"
259                continue
260            else:
261                variables[name.value] = var = nodes.Name(name.value, "load")
262
263            if plural_expr is None:
264                if isinstance(var, nodes.Call):
265                    plural_expr = nodes.Name("_trans", "load")
266                    variables[name.value] = plural_expr
267                    plural_expr_assignment = nodes.Assign(
268                        nodes.Name("_trans", "store"), var
269                    )
270                else:
271                    plural_expr = var
272                num_called_num = name.value == "num"
273
274        parser.stream.expect("block_end")
275
276        plural = None
277        have_plural = False
278        referenced = set()
279
280        # now parse until endtrans or pluralize
281        singular_names, singular = self._parse_block(parser, True)
282        if singular_names:
283            referenced.update(singular_names)
284            if plural_expr is None:
285                plural_expr = nodes.Name(singular_names[0], "load")
286                num_called_num = singular_names[0] == "num"
287
288        # if we have a pluralize block, we parse that too
289        if parser.stream.current.test("name:pluralize"):
290            have_plural = True
291            next(parser.stream)
292            if parser.stream.current.type != "block_end":
293                name = parser.stream.expect("name")
294                if name.value not in variables:
295                    parser.fail(
296                        f"unknown variable {name.value!r} for pluralization",
297                        name.lineno,
298                        exc=TemplateAssertionError,
299                    )
300                plural_expr = variables[name.value]
301                num_called_num = name.value == "num"
302            parser.stream.expect("block_end")
303            plural_names, plural = self._parse_block(parser, False)
304            next(parser.stream)
305            referenced.update(plural_names)
306        else:
307            next(parser.stream)
308
309        # register free names as simple name expressions
310        for var in referenced:
311            if var not in variables:
312                variables[var] = nodes.Name(var, "load")
313
314        if not have_plural:
315            plural_expr = None
316        elif plural_expr is None:
317            parser.fail("pluralize without variables", lineno)
318
319        if trimmed is None:
320            trimmed = self.environment.policies["ext.i18n.trimmed"]
321        if trimmed:
322            singular = self._trim_whitespace(singular)
323            if plural:
324                plural = self._trim_whitespace(plural)
325
326        node = self._make_node(
327            singular,
328            plural,
329            variables,
330            plural_expr,
331            bool(referenced),
332            num_called_num and have_plural,
333        )
334        node.set_lineno(lineno)
335        if plural_expr_assignment is not None:
336            return [plural_expr_assignment, node]
337        else:
338            return node
339
340    def _trim_whitespace(self, string, _ws_re=_ws_re):
341        return _ws_re.sub(" ", string.strip())
342
343    def _parse_block(self, parser, allow_pluralize):
344        """Parse until the next block tag with a given name."""
345        referenced = []
346        buf = []
347        while 1:
348            if parser.stream.current.type == "data":
349                buf.append(parser.stream.current.value.replace("%", "%%"))
350                next(parser.stream)
351            elif parser.stream.current.type == "variable_begin":
352                next(parser.stream)
353                name = parser.stream.expect("name").value
354                referenced.append(name)
355                buf.append(f"%({name})s")
356                parser.stream.expect("variable_end")
357            elif parser.stream.current.type == "block_begin":
358                next(parser.stream)
359                if parser.stream.current.test("name:endtrans"):
360                    break
361                elif parser.stream.current.test("name:pluralize"):
362                    if allow_pluralize:
363                        break
364                    parser.fail(
365                        "a translatable section can have only one pluralize section"
366                    )
367                parser.fail(
368                    "control structures in translatable sections are not allowed"
369                )
370            elif parser.stream.eos:
371                parser.fail("unclosed translation block")
372            else:
373                raise RuntimeError("internal parser error")
374
375        return referenced, concat(buf)
376
377    def _make_node(
378        self, singular, plural, variables, plural_expr, vars_referenced, num_called_num
379    ):
380        """Generates a useful node from the data provided."""
381        # no variables referenced?  no need to escape for old style
382        # gettext invocations only if there are vars.
383        if not vars_referenced and not self.environment.newstyle_gettext:
384            singular = singular.replace("%%", "%")
385            if plural:
386                plural = plural.replace("%%", "%")
387
388        # singular only:
389        if plural_expr is None:
390            gettext = nodes.Name("gettext", "load")
391            node = nodes.Call(gettext, [nodes.Const(singular)], [], None, None)
392
393        # singular and plural
394        else:
395            ngettext = nodes.Name("ngettext", "load")
396            node = nodes.Call(
397                ngettext,
398                [nodes.Const(singular), nodes.Const(plural), plural_expr],
399                [],
400                None,
401                None,
402            )
403
404        # in case newstyle gettext is used, the method is powerful
405        # enough to handle the variable expansion and autoescape
406        # handling itself
407        if self.environment.newstyle_gettext:
408            for key, value in variables.items():
409                # the function adds that later anyways in case num was
410                # called num, so just skip it.
411                if num_called_num and key == "num":
412                    continue
413                node.kwargs.append(nodes.Keyword(key, value))
414
415        # otherwise do that here
416        else:
417            # mark the return value as safe if we are in an
418            # environment with autoescaping turned on
419            node = nodes.MarkSafeIfAutoescape(node)
420            if variables:
421                node = nodes.Mod(
422                    node,
423                    nodes.Dict(
424                        [
425                            nodes.Pair(nodes.Const(key), value)
426                            for key, value in variables.items()
427                        ]
428                    ),
429                )
430        return nodes.Output([node])
431
432
433class ExprStmtExtension(Extension):
434    """Adds a `do` tag to Jinja that works like the print statement just
435    that it doesn't print the return value.
436    """
437
438    tags = {"do"}
439
440    def parse(self, parser):
441        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
442        node.node = parser.parse_tuple()
443        return node
444
445
446class LoopControlExtension(Extension):
447    """Adds break and continue to the template engine."""
448
449    tags = {"break", "continue"}
450
451    def parse(self, parser):
452        token = next(parser.stream)
453        if token.value == "break":
454            return nodes.Break(lineno=token.lineno)
455        return nodes.Continue(lineno=token.lineno)
456
457
458class WithExtension(Extension):
459    pass
460
461
462class AutoEscapeExtension(Extension):
463    pass
464
465
466class DebugExtension(Extension):
467    """A ``{% debug %}`` tag that dumps the available variables,
468    filters, and tests.
469
470    .. code-block:: html+jinja
471
472        <pre>{% debug %}</pre>
473
474    .. code-block:: text
475
476        {'context': {'cycler': <class 'jinja2.utils.Cycler'>,
477                     ...,
478                     'namespace': <class 'jinja2.utils.Namespace'>},
479         'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd',
480                     ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'],
481         'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined',
482                   ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']}
483
484    .. versionadded:: 2.11.0
485    """
486
487    tags = {"debug"}
488
489    def parse(self, parser):
490        lineno = parser.stream.expect("name:debug").lineno
491        context = ContextReference()
492        result = self.call_method("_render", [context], lineno=lineno)
493        return nodes.Output([result], lineno=lineno)
494
495    def _render(self, context):
496        result = {
497            "context": context.get_all(),
498            "filters": sorted(self.environment.filters.keys()),
499            "tests": sorted(self.environment.tests.keys()),
500        }
501
502        # Set the depth since the intent is to show the top few names.
503        if version_info[:2] >= (3, 4):
504            return pprint.pformat(result, depth=3, compact=True)
505        else:
506            return pprint.pformat(result, depth=3)
507
508
509def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, babel_style=True):
510    """Extract localizable strings from the given template node.  Per
511    default this function returns matches in babel style that means non string
512    parameters as well as keyword arguments are returned as `None`.  This
513    allows Babel to figure out what you really meant if you are using
514    gettext functions that allow keyword arguments for placeholder expansion.
515    If you don't want that behavior set the `babel_style` parameter to `False`
516    which causes only strings to be returned and parameters are always stored
517    in tuples.  As a consequence invalid gettext calls (calls without a single
518    string parameter or string parameters after non-string parameters) are
519    skipped.
520
521    This example explains the behavior:
522
523    >>> from jinja2 import Environment
524    >>> env = Environment()
525    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
526    >>> list(extract_from_ast(node))
527    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
528    >>> list(extract_from_ast(node, babel_style=False))
529    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
530
531    For every string found this function yields a ``(lineno, function,
532    message)`` tuple, where:
533
534    * ``lineno`` is the number of the line on which the string was found,
535    * ``function`` is the name of the ``gettext`` function used (if the
536      string was extracted from embedded Python code), and
537    *   ``message`` is the string, or a tuple of strings for functions
538         with multiple string arguments.
539
540    This extraction function operates on the AST and is because of that unable
541    to extract any comments.  For comment support you have to use the babel
542    extraction interface or extract comments yourself.
543    """
544    for node in node.find_all(nodes.Call):
545        if (
546            not isinstance(node.node, nodes.Name)
547            or node.node.name not in gettext_functions
548        ):
549            continue
550
551        strings = []
552        for arg in node.args:
553            if isinstance(arg, nodes.Const) and isinstance(arg.value, str):
554                strings.append(arg.value)
555            else:
556                strings.append(None)
557
558        for _ in node.kwargs:
559            strings.append(None)
560        if node.dyn_args is not None:
561            strings.append(None)
562        if node.dyn_kwargs is not None:
563            strings.append(None)
564
565        if not babel_style:
566            strings = tuple(x for x in strings if x is not None)
567            if not strings:
568                continue
569        else:
570            if len(strings) == 1:
571                strings = strings[0]
572            else:
573                strings = tuple(strings)
574        yield node.lineno, node.node.name, strings
575
576
577class _CommentFinder:
578    """Helper class to find comments in a token stream.  Can only
579    find comments for gettext calls forwards.  Once the comment
580    from line 4 is found, a comment for line 1 will not return a
581    usable value.
582    """
583
584    def __init__(self, tokens, comment_tags):
585        self.tokens = tokens
586        self.comment_tags = comment_tags
587        self.offset = 0
588        self.last_lineno = 0
589
590    def find_backwards(self, offset):
591        try:
592            for _, token_type, token_value in reversed(
593                self.tokens[self.offset : offset]
594            ):
595                if token_type in ("comment", "linecomment"):
596                    try:
597                        prefix, comment = token_value.split(None, 1)
598                    except ValueError:
599                        continue
600                    if prefix in self.comment_tags:
601                        return [comment.rstrip()]
602            return []
603        finally:
604            self.offset = offset
605
606    def find_comments(self, lineno):
607        if not self.comment_tags or self.last_lineno > lineno:
608            return []
609        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]):
610            if token_lineno > lineno:
611                return self.find_backwards(self.offset + idx)
612        return self.find_backwards(len(self.tokens))
613
614
615def babel_extract(fileobj, keywords, comment_tags, options):
616    """Babel extraction method for Jinja templates.
617
618    .. versionchanged:: 2.3
619       Basic support for translation comments was added.  If `comment_tags`
620       is now set to a list of keywords for extraction, the extractor will
621       try to find the best preceding comment that begins with one of the
622       keywords.  For best results, make sure to not have more than one
623       gettext call in one line of code and the matching comment in the
624       same line or the line before.
625
626    .. versionchanged:: 2.5.1
627       The `newstyle_gettext` flag can be set to `True` to enable newstyle
628       gettext calls.
629
630    .. versionchanged:: 2.7
631       A `silent` option can now be provided.  If set to `False` template
632       syntax errors are propagated instead of being ignored.
633
634    :param fileobj: the file-like object the messages should be extracted from
635    :param keywords: a list of keywords (i.e. function names) that should be
636                     recognized as translation functions
637    :param comment_tags: a list of translator tags to search for and include
638                         in the results.
639    :param options: a dictionary of additional options (optional)
640    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
641             (comments will be empty currently)
642    """
643    extensions = set()
644    for extension in options.get("extensions", "").split(","):
645        extension = extension.strip()
646        if not extension:
647            continue
648        extensions.add(import_string(extension))
649    if InternationalizationExtension not in extensions:
650        extensions.add(InternationalizationExtension)
651
652    def getbool(options, key, default=False):
653        return options.get(key, str(default)).lower() in ("1", "on", "yes", "true")
654
655    silent = getbool(options, "silent", True)
656    environment = Environment(
657        options.get("block_start_string", BLOCK_START_STRING),
658        options.get("block_end_string", BLOCK_END_STRING),
659        options.get("variable_start_string", VARIABLE_START_STRING),
660        options.get("variable_end_string", VARIABLE_END_STRING),
661        options.get("comment_start_string", COMMENT_START_STRING),
662        options.get("comment_end_string", COMMENT_END_STRING),
663        options.get("line_statement_prefix") or LINE_STATEMENT_PREFIX,
664        options.get("line_comment_prefix") or LINE_COMMENT_PREFIX,
665        getbool(options, "trim_blocks", TRIM_BLOCKS),
666        getbool(options, "lstrip_blocks", LSTRIP_BLOCKS),
667        NEWLINE_SEQUENCE,
668        getbool(options, "keep_trailing_newline", KEEP_TRAILING_NEWLINE),
669        frozenset(extensions),
670        cache_size=0,
671        auto_reload=False,
672    )
673
674    if getbool(options, "trimmed"):
675        environment.policies["ext.i18n.trimmed"] = True
676    if getbool(options, "newstyle_gettext"):
677        environment.newstyle_gettext = True
678
679    source = fileobj.read().decode(options.get("encoding", "utf-8"))
680    try:
681        node = environment.parse(source)
682        tokens = list(environment.lex(environment.preprocess(source)))
683    except TemplateSyntaxError:
684        if not silent:
685            raise
686        # skip templates with syntax errors
687        return
688
689    finder = _CommentFinder(tokens, comment_tags)
690    for lineno, func, message in extract_from_ast(node, keywords):
691        yield lineno, func, message, finder.find_comments(lineno)
692
693
694#: nicer import names
695i18n = InternationalizationExtension
696do = ExprStmtExtension
697loopcontrols = LoopControlExtension
698with_ = WithExtension
699autoescape = AutoEscapeExtension
700debug = DebugExtension
701