1"""Extension API for adding custom tags and behavior.""" 2import pprint 3import re 4from sys import version_info 5 6from markupsafe import Markup 7 8from . import nodes 9from .defaults import BLOCK_END_STRING 10from .defaults import BLOCK_START_STRING 11from .defaults import COMMENT_END_STRING 12from .defaults import COMMENT_START_STRING 13from .defaults import KEEP_TRAILING_NEWLINE 14from .defaults import LINE_COMMENT_PREFIX 15from .defaults import LINE_STATEMENT_PREFIX 16from .defaults import LSTRIP_BLOCKS 17from .defaults import NEWLINE_SEQUENCE 18from .defaults import TRIM_BLOCKS 19from .defaults import VARIABLE_END_STRING 20from .defaults import VARIABLE_START_STRING 21from .environment import Environment 22from .exceptions import TemplateAssertionError 23from .exceptions import TemplateSyntaxError 24from .nodes import ContextReference 25from .runtime import concat 26from .utils import contextfunction 27from .utils import import_string 28 29# I18N functions available in Jinja templates. If the I18N library 30# provides ugettext, it will be assigned to gettext. 31GETTEXT_FUNCTIONS = ("_", "gettext", "ngettext") 32_ws_re = re.compile(r"\s*\n\s*") 33 34 35class ExtensionRegistry(type): 36 """Gives the extension an unique identifier.""" 37 38 def __new__(mcs, name, bases, d): 39 rv = type.__new__(mcs, name, bases, d) 40 rv.identifier = f"{rv.__module__}.{rv.__name__}" 41 return rv 42 43 44class Extension(metaclass=ExtensionRegistry): 45 """Extensions can be used to add extra functionality to the Jinja template 46 system at the parser level. Custom extensions are bound to an environment 47 but may not store environment specific data on `self`. The reason for 48 this is that an extension can be bound to another environment (for 49 overlays) by creating a copy and reassigning the `environment` attribute. 50 51 As extensions are created by the environment they cannot accept any 52 arguments for configuration. One may want to work around that by using 53 a factory function, but that is not possible as extensions are identified 54 by their import name. The correct way to configure the extension is 55 storing the configuration values on the environment. Because this way the 56 environment ends up acting as central configuration storage the 57 attributes may clash which is why extensions have to ensure that the names 58 they choose for configuration are not too generic. ``prefix`` for example 59 is a terrible name, ``fragment_cache_prefix`` on the other hand is a good 60 name as includes the name of the extension (fragment cache). 61 """ 62 63 #: if this extension parses this is the list of tags it's listening to. 64 tags = set() 65 66 #: the priority of that extension. This is especially useful for 67 #: extensions that preprocess values. A lower value means higher 68 #: priority. 69 #: 70 #: .. versionadded:: 2.4 71 priority = 100 72 73 def __init__(self, environment): 74 self.environment = environment 75 76 def bind(self, environment): 77 """Create a copy of this extension bound to another environment.""" 78 rv = object.__new__(self.__class__) 79 rv.__dict__.update(self.__dict__) 80 rv.environment = environment 81 return rv 82 83 def preprocess(self, source, name, filename=None): 84 """This method is called before the actual lexing and can be used to 85 preprocess the source. The `filename` is optional. The return value 86 must be the preprocessed source. 87 """ 88 return source 89 90 def filter_stream(self, stream): 91 """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used 92 to filter tokens returned. This method has to return an iterable of 93 :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a 94 :class:`~jinja2.lexer.TokenStream`. 95 """ 96 return stream 97 98 def parse(self, parser): 99 """If any of the :attr:`tags` matched this method is called with the 100 parser as first argument. The token the parser stream is pointing at 101 is the name token that matched. This method has to return one or a 102 list of multiple nodes. 103 """ 104 raise NotImplementedError() 105 106 def attr(self, name, lineno=None): 107 """Return an attribute node for the current extension. This is useful 108 to pass constants on extensions to generated template code. 109 110 :: 111 112 self.attr('_my_attribute', lineno=lineno) 113 """ 114 return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno) 115 116 def call_method( 117 self, name, args=None, kwargs=None, dyn_args=None, dyn_kwargs=None, lineno=None 118 ): 119 """Call a method of the extension. This is a shortcut for 120 :meth:`attr` + :class:`jinja2.nodes.Call`. 121 """ 122 if args is None: 123 args = [] 124 if kwargs is None: 125 kwargs = [] 126 return nodes.Call( 127 self.attr(name, lineno=lineno), 128 args, 129 kwargs, 130 dyn_args, 131 dyn_kwargs, 132 lineno=lineno, 133 ) 134 135 136@contextfunction 137def _gettext_alias(__context, *args, **kwargs): 138 return __context.call(__context.resolve("gettext"), *args, **kwargs) 139 140 141def _make_new_gettext(func): 142 @contextfunction 143 def gettext(__context, __string, **variables): 144 rv = __context.call(func, __string) 145 if __context.eval_ctx.autoescape: 146 rv = Markup(rv) 147 # Always treat as a format string, even if there are no 148 # variables. This makes translation strings more consistent 149 # and predictable. This requires escaping 150 return rv % variables 151 152 return gettext 153 154 155def _make_new_ngettext(func): 156 @contextfunction 157 def ngettext(__context, __singular, __plural, __num, **variables): 158 variables.setdefault("num", __num) 159 rv = __context.call(func, __singular, __plural, __num) 160 if __context.eval_ctx.autoescape: 161 rv = Markup(rv) 162 # Always treat as a format string, see gettext comment above. 163 return rv % variables 164 165 return ngettext 166 167 168class InternationalizationExtension(Extension): 169 """This extension adds gettext support to Jinja.""" 170 171 tags = {"trans"} 172 173 # TODO: the i18n extension is currently reevaluating values in a few 174 # situations. Take this example: 175 # {% trans count=something() %}{{ count }} foo{% pluralize 176 # %}{{ count }} fooss{% endtrans %} 177 # something is called twice here. One time for the gettext value and 178 # the other time for the n-parameter of the ngettext function. 179 180 def __init__(self, environment): 181 Extension.__init__(self, environment) 182 environment.globals["_"] = _gettext_alias 183 environment.extend( 184 install_gettext_translations=self._install, 185 install_null_translations=self._install_null, 186 install_gettext_callables=self._install_callables, 187 uninstall_gettext_translations=self._uninstall, 188 extract_translations=self._extract, 189 newstyle_gettext=False, 190 ) 191 192 def _install(self, translations, newstyle=None): 193 # ugettext and ungettext are preferred in case the I18N library 194 # is providing compatibility with older Python versions. 195 gettext = getattr(translations, "ugettext", None) 196 if gettext is None: 197 gettext = translations.gettext 198 ngettext = getattr(translations, "ungettext", None) 199 if ngettext is None: 200 ngettext = translations.ngettext 201 self._install_callables(gettext, ngettext, newstyle) 202 203 def _install_null(self, newstyle=None): 204 self._install_callables( 205 lambda x: x, lambda s, p, n: s if n == 1 else p, newstyle 206 ) 207 208 def _install_callables(self, gettext, ngettext, newstyle=None): 209 if newstyle is not None: 210 self.environment.newstyle_gettext = newstyle 211 if self.environment.newstyle_gettext: 212 gettext = _make_new_gettext(gettext) 213 ngettext = _make_new_ngettext(ngettext) 214 self.environment.globals.update(gettext=gettext, ngettext=ngettext) 215 216 def _uninstall(self, translations): 217 for key in "gettext", "ngettext": 218 self.environment.globals.pop(key, None) 219 220 def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS): 221 if isinstance(source, str): 222 source = self.environment.parse(source) 223 return extract_from_ast(source, gettext_functions) 224 225 def parse(self, parser): 226 """Parse a translatable tag.""" 227 lineno = next(parser.stream).lineno 228 num_called_num = False 229 230 # find all the variables referenced. Additionally a variable can be 231 # defined in the body of the trans block too, but this is checked at 232 # a later state. 233 plural_expr = None 234 plural_expr_assignment = None 235 variables = {} 236 trimmed = None 237 while parser.stream.current.type != "block_end": 238 if variables: 239 parser.stream.expect("comma") 240 241 # skip colon for python compatibility 242 if parser.stream.skip_if("colon"): 243 break 244 245 name = parser.stream.expect("name") 246 if name.value in variables: 247 parser.fail( 248 f"translatable variable {name.value!r} defined twice.", 249 name.lineno, 250 exc=TemplateAssertionError, 251 ) 252 253 # expressions 254 if parser.stream.current.type == "assign": 255 next(parser.stream) 256 variables[name.value] = var = parser.parse_expression() 257 elif trimmed is None and name.value in ("trimmed", "notrimmed"): 258 trimmed = name.value == "trimmed" 259 continue 260 else: 261 variables[name.value] = var = nodes.Name(name.value, "load") 262 263 if plural_expr is None: 264 if isinstance(var, nodes.Call): 265 plural_expr = nodes.Name("_trans", "load") 266 variables[name.value] = plural_expr 267 plural_expr_assignment = nodes.Assign( 268 nodes.Name("_trans", "store"), var 269 ) 270 else: 271 plural_expr = var 272 num_called_num = name.value == "num" 273 274 parser.stream.expect("block_end") 275 276 plural = None 277 have_plural = False 278 referenced = set() 279 280 # now parse until endtrans or pluralize 281 singular_names, singular = self._parse_block(parser, True) 282 if singular_names: 283 referenced.update(singular_names) 284 if plural_expr is None: 285 plural_expr = nodes.Name(singular_names[0], "load") 286 num_called_num = singular_names[0] == "num" 287 288 # if we have a pluralize block, we parse that too 289 if parser.stream.current.test("name:pluralize"): 290 have_plural = True 291 next(parser.stream) 292 if parser.stream.current.type != "block_end": 293 name = parser.stream.expect("name") 294 if name.value not in variables: 295 parser.fail( 296 f"unknown variable {name.value!r} for pluralization", 297 name.lineno, 298 exc=TemplateAssertionError, 299 ) 300 plural_expr = variables[name.value] 301 num_called_num = name.value == "num" 302 parser.stream.expect("block_end") 303 plural_names, plural = self._parse_block(parser, False) 304 next(parser.stream) 305 referenced.update(plural_names) 306 else: 307 next(parser.stream) 308 309 # register free names as simple name expressions 310 for var in referenced: 311 if var not in variables: 312 variables[var] = nodes.Name(var, "load") 313 314 if not have_plural: 315 plural_expr = None 316 elif plural_expr is None: 317 parser.fail("pluralize without variables", lineno) 318 319 if trimmed is None: 320 trimmed = self.environment.policies["ext.i18n.trimmed"] 321 if trimmed: 322 singular = self._trim_whitespace(singular) 323 if plural: 324 plural = self._trim_whitespace(plural) 325 326 node = self._make_node( 327 singular, 328 plural, 329 variables, 330 plural_expr, 331 bool(referenced), 332 num_called_num and have_plural, 333 ) 334 node.set_lineno(lineno) 335 if plural_expr_assignment is not None: 336 return [plural_expr_assignment, node] 337 else: 338 return node 339 340 def _trim_whitespace(self, string, _ws_re=_ws_re): 341 return _ws_re.sub(" ", string.strip()) 342 343 def _parse_block(self, parser, allow_pluralize): 344 """Parse until the next block tag with a given name.""" 345 referenced = [] 346 buf = [] 347 while 1: 348 if parser.stream.current.type == "data": 349 buf.append(parser.stream.current.value.replace("%", "%%")) 350 next(parser.stream) 351 elif parser.stream.current.type == "variable_begin": 352 next(parser.stream) 353 name = parser.stream.expect("name").value 354 referenced.append(name) 355 buf.append(f"%({name})s") 356 parser.stream.expect("variable_end") 357 elif parser.stream.current.type == "block_begin": 358 next(parser.stream) 359 if parser.stream.current.test("name:endtrans"): 360 break 361 elif parser.stream.current.test("name:pluralize"): 362 if allow_pluralize: 363 break 364 parser.fail( 365 "a translatable section can have only one pluralize section" 366 ) 367 parser.fail( 368 "control structures in translatable sections are not allowed" 369 ) 370 elif parser.stream.eos: 371 parser.fail("unclosed translation block") 372 else: 373 raise RuntimeError("internal parser error") 374 375 return referenced, concat(buf) 376 377 def _make_node( 378 self, singular, plural, variables, plural_expr, vars_referenced, num_called_num 379 ): 380 """Generates a useful node from the data provided.""" 381 # no variables referenced? no need to escape for old style 382 # gettext invocations only if there are vars. 383 if not vars_referenced and not self.environment.newstyle_gettext: 384 singular = singular.replace("%%", "%") 385 if plural: 386 plural = plural.replace("%%", "%") 387 388 # singular only: 389 if plural_expr is None: 390 gettext = nodes.Name("gettext", "load") 391 node = nodes.Call(gettext, [nodes.Const(singular)], [], None, None) 392 393 # singular and plural 394 else: 395 ngettext = nodes.Name("ngettext", "load") 396 node = nodes.Call( 397 ngettext, 398 [nodes.Const(singular), nodes.Const(plural), plural_expr], 399 [], 400 None, 401 None, 402 ) 403 404 # in case newstyle gettext is used, the method is powerful 405 # enough to handle the variable expansion and autoescape 406 # handling itself 407 if self.environment.newstyle_gettext: 408 for key, value in variables.items(): 409 # the function adds that later anyways in case num was 410 # called num, so just skip it. 411 if num_called_num and key == "num": 412 continue 413 node.kwargs.append(nodes.Keyword(key, value)) 414 415 # otherwise do that here 416 else: 417 # mark the return value as safe if we are in an 418 # environment with autoescaping turned on 419 node = nodes.MarkSafeIfAutoescape(node) 420 if variables: 421 node = nodes.Mod( 422 node, 423 nodes.Dict( 424 [ 425 nodes.Pair(nodes.Const(key), value) 426 for key, value in variables.items() 427 ] 428 ), 429 ) 430 return nodes.Output([node]) 431 432 433class ExprStmtExtension(Extension): 434 """Adds a `do` tag to Jinja that works like the print statement just 435 that it doesn't print the return value. 436 """ 437 438 tags = {"do"} 439 440 def parse(self, parser): 441 node = nodes.ExprStmt(lineno=next(parser.stream).lineno) 442 node.node = parser.parse_tuple() 443 return node 444 445 446class LoopControlExtension(Extension): 447 """Adds break and continue to the template engine.""" 448 449 tags = {"break", "continue"} 450 451 def parse(self, parser): 452 token = next(parser.stream) 453 if token.value == "break": 454 return nodes.Break(lineno=token.lineno) 455 return nodes.Continue(lineno=token.lineno) 456 457 458class WithExtension(Extension): 459 pass 460 461 462class AutoEscapeExtension(Extension): 463 pass 464 465 466class DebugExtension(Extension): 467 """A ``{% debug %}`` tag that dumps the available variables, 468 filters, and tests. 469 470 .. code-block:: html+jinja 471 472 <pre>{% debug %}</pre> 473 474 .. code-block:: text 475 476 {'context': {'cycler': <class 'jinja2.utils.Cycler'>, 477 ..., 478 'namespace': <class 'jinja2.utils.Namespace'>}, 479 'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd', 480 ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'], 481 'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined', 482 ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']} 483 484 .. versionadded:: 2.11.0 485 """ 486 487 tags = {"debug"} 488 489 def parse(self, parser): 490 lineno = parser.stream.expect("name:debug").lineno 491 context = ContextReference() 492 result = self.call_method("_render", [context], lineno=lineno) 493 return nodes.Output([result], lineno=lineno) 494 495 def _render(self, context): 496 result = { 497 "context": context.get_all(), 498 "filters": sorted(self.environment.filters.keys()), 499 "tests": sorted(self.environment.tests.keys()), 500 } 501 502 # Set the depth since the intent is to show the top few names. 503 if version_info[:2] >= (3, 4): 504 return pprint.pformat(result, depth=3, compact=True) 505 else: 506 return pprint.pformat(result, depth=3) 507 508 509def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, babel_style=True): 510 """Extract localizable strings from the given template node. Per 511 default this function returns matches in babel style that means non string 512 parameters as well as keyword arguments are returned as `None`. This 513 allows Babel to figure out what you really meant if you are using 514 gettext functions that allow keyword arguments for placeholder expansion. 515 If you don't want that behavior set the `babel_style` parameter to `False` 516 which causes only strings to be returned and parameters are always stored 517 in tuples. As a consequence invalid gettext calls (calls without a single 518 string parameter or string parameters after non-string parameters) are 519 skipped. 520 521 This example explains the behavior: 522 523 >>> from jinja2 import Environment 524 >>> env = Environment() 525 >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}') 526 >>> list(extract_from_ast(node)) 527 [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))] 528 >>> list(extract_from_ast(node, babel_style=False)) 529 [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))] 530 531 For every string found this function yields a ``(lineno, function, 532 message)`` tuple, where: 533 534 * ``lineno`` is the number of the line on which the string was found, 535 * ``function`` is the name of the ``gettext`` function used (if the 536 string was extracted from embedded Python code), and 537 * ``message`` is the string, or a tuple of strings for functions 538 with multiple string arguments. 539 540 This extraction function operates on the AST and is because of that unable 541 to extract any comments. For comment support you have to use the babel 542 extraction interface or extract comments yourself. 543 """ 544 for node in node.find_all(nodes.Call): 545 if ( 546 not isinstance(node.node, nodes.Name) 547 or node.node.name not in gettext_functions 548 ): 549 continue 550 551 strings = [] 552 for arg in node.args: 553 if isinstance(arg, nodes.Const) and isinstance(arg.value, str): 554 strings.append(arg.value) 555 else: 556 strings.append(None) 557 558 for _ in node.kwargs: 559 strings.append(None) 560 if node.dyn_args is not None: 561 strings.append(None) 562 if node.dyn_kwargs is not None: 563 strings.append(None) 564 565 if not babel_style: 566 strings = tuple(x for x in strings if x is not None) 567 if not strings: 568 continue 569 else: 570 if len(strings) == 1: 571 strings = strings[0] 572 else: 573 strings = tuple(strings) 574 yield node.lineno, node.node.name, strings 575 576 577class _CommentFinder: 578 """Helper class to find comments in a token stream. Can only 579 find comments for gettext calls forwards. Once the comment 580 from line 4 is found, a comment for line 1 will not return a 581 usable value. 582 """ 583 584 def __init__(self, tokens, comment_tags): 585 self.tokens = tokens 586 self.comment_tags = comment_tags 587 self.offset = 0 588 self.last_lineno = 0 589 590 def find_backwards(self, offset): 591 try: 592 for _, token_type, token_value in reversed( 593 self.tokens[self.offset : offset] 594 ): 595 if token_type in ("comment", "linecomment"): 596 try: 597 prefix, comment = token_value.split(None, 1) 598 except ValueError: 599 continue 600 if prefix in self.comment_tags: 601 return [comment.rstrip()] 602 return [] 603 finally: 604 self.offset = offset 605 606 def find_comments(self, lineno): 607 if not self.comment_tags or self.last_lineno > lineno: 608 return [] 609 for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]): 610 if token_lineno > lineno: 611 return self.find_backwards(self.offset + idx) 612 return self.find_backwards(len(self.tokens)) 613 614 615def babel_extract(fileobj, keywords, comment_tags, options): 616 """Babel extraction method for Jinja templates. 617 618 .. versionchanged:: 2.3 619 Basic support for translation comments was added. If `comment_tags` 620 is now set to a list of keywords for extraction, the extractor will 621 try to find the best preceding comment that begins with one of the 622 keywords. For best results, make sure to not have more than one 623 gettext call in one line of code and the matching comment in the 624 same line or the line before. 625 626 .. versionchanged:: 2.5.1 627 The `newstyle_gettext` flag can be set to `True` to enable newstyle 628 gettext calls. 629 630 .. versionchanged:: 2.7 631 A `silent` option can now be provided. If set to `False` template 632 syntax errors are propagated instead of being ignored. 633 634 :param fileobj: the file-like object the messages should be extracted from 635 :param keywords: a list of keywords (i.e. function names) that should be 636 recognized as translation functions 637 :param comment_tags: a list of translator tags to search for and include 638 in the results. 639 :param options: a dictionary of additional options (optional) 640 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples. 641 (comments will be empty currently) 642 """ 643 extensions = set() 644 for extension in options.get("extensions", "").split(","): 645 extension = extension.strip() 646 if not extension: 647 continue 648 extensions.add(import_string(extension)) 649 if InternationalizationExtension not in extensions: 650 extensions.add(InternationalizationExtension) 651 652 def getbool(options, key, default=False): 653 return options.get(key, str(default)).lower() in ("1", "on", "yes", "true") 654 655 silent = getbool(options, "silent", True) 656 environment = Environment( 657 options.get("block_start_string", BLOCK_START_STRING), 658 options.get("block_end_string", BLOCK_END_STRING), 659 options.get("variable_start_string", VARIABLE_START_STRING), 660 options.get("variable_end_string", VARIABLE_END_STRING), 661 options.get("comment_start_string", COMMENT_START_STRING), 662 options.get("comment_end_string", COMMENT_END_STRING), 663 options.get("line_statement_prefix") or LINE_STATEMENT_PREFIX, 664 options.get("line_comment_prefix") or LINE_COMMENT_PREFIX, 665 getbool(options, "trim_blocks", TRIM_BLOCKS), 666 getbool(options, "lstrip_blocks", LSTRIP_BLOCKS), 667 NEWLINE_SEQUENCE, 668 getbool(options, "keep_trailing_newline", KEEP_TRAILING_NEWLINE), 669 frozenset(extensions), 670 cache_size=0, 671 auto_reload=False, 672 ) 673 674 if getbool(options, "trimmed"): 675 environment.policies["ext.i18n.trimmed"] = True 676 if getbool(options, "newstyle_gettext"): 677 environment.newstyle_gettext = True 678 679 source = fileobj.read().decode(options.get("encoding", "utf-8")) 680 try: 681 node = environment.parse(source) 682 tokens = list(environment.lex(environment.preprocess(source))) 683 except TemplateSyntaxError: 684 if not silent: 685 raise 686 # skip templates with syntax errors 687 return 688 689 finder = _CommentFinder(tokens, comment_tags) 690 for lineno, func, message in extract_from_ast(node, keywords): 691 yield lineno, func, message, finder.find_comments(lineno) 692 693 694#: nicer import names 695i18n = InternationalizationExtension 696do = ExprStmtExtension 697loopcontrols = LoopControlExtension 698with_ = WithExtension 699autoescape = AutoEscapeExtension 700debug = DebugExtension 701