D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
proc
/
thread-self
/
root
/
opt
/
imunify360
/
venv
/
lib
/
python3.11
/
site-packages
/
babel
/
messages
/
Filename :
jslexer.py
back
Copy
""" babel.messages.jslexer ~~~~~~~~~~~~~~~~~~~~~~ A simple JavaScript 1.5 lexer which is used for the JavaScript extractor. :copyright: (c) 2013-2023 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import re from collections.abc import Generator from typing import NamedTuple operators: list[str] = sorted([ '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' ], key=len, reverse=True) escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) division_re = re.compile(r'/=?') regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL) line_re = re.compile(r'(\r\n|\n|\r)') line_join_re = re.compile(r'\\' + line_re.pattern) uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}') class Token(NamedTuple): type: str value: str lineno: int _rules: list[tuple[str | None, re.Pattern[str]]] = [ (None, re.compile(r'\s+', re.UNICODE)), (None, re.compile(r'<!--.*')), ('linecomment', re.compile(r'//.*')), ('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)), ('dotted_name', dotted_name_re), ('name', name_re), ('number', re.compile(r'''( (?:0|[1-9]\d*) (\.\d+)? ([eE][-+]?\d+)? | (0x[a-fA-F0-9]+) )''', re.VERBOSE)), ('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules` ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)), ('string', re.compile(r'''( '(?:[^'\\]*(?:\\.[^'\\]*)*)' | "(?:[^"\\]*(?:\\.[^"\\]*)*)" )''', re.VERBOSE | re.DOTALL)) ] def get_rules(jsx: bool, dotted: bool, template_string: bool) -> list[tuple[str | None, re.Pattern[str]]]: """ Get a tokenization rule list given the passed syntax options. Internal to this module. """ rules = [] for token_type, rule in _rules: if not jsx and token_type and 'jsx' in token_type: continue if not template_string and token_type == 'template_string': continue if token_type == 'dotted_name': if not dotted: continue token_type = 'name' rules.append((token_type, rule)) return rules def indicates_division(token: Token) -> bool: """A helper function that helps the tokenizer to decide if the current token may be followed by a division operator. """ if token.type == 'operator': return token.value in (')', ']', '}', '++', '--') return token.type in ('name', 'number', 'string', 'regexp') def unquote_string(string: str) -> str: """Unquote a string with JavaScript rules. The string has to start with string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).) """ assert string and string[0] == string[-1] and string[0] in '"\'`', \ 'string provided is not properly delimited' string = line_join_re.sub('\\1', string[1:-1]) result = [] add = result.append pos = 0 while True: # scan for the next escape escape_pos = string.find('\\', pos) if escape_pos < 0: break add(string[pos:escape_pos]) # check which character is escaped next_char = string[escape_pos + 1] if next_char in escapes: add(escapes[next_char]) # unicode escapes. trie to consume up to four characters of # hexadecimal characters and try to interpret them as unicode # character point. If there is no such character point, put # all the consumed characters into the string. elif next_char in 'uU': escaped = uni_escape_re.match(string, escape_pos + 2) if escaped is not None: escaped_value = escaped.group() if len(escaped_value) == 4: try: add(chr(int(escaped_value, 16))) except ValueError: pass else: pos = escape_pos + 6 continue add(next_char + escaped_value) pos = escaped.end() continue else: add(next_char) # hex escapes. conversion from 2-digits hex to char is infallible elif next_char in 'xX': escaped = hex_escape_re.match(string, escape_pos + 2) if escaped is not None: escaped_value = escaped.group() add(chr(int(escaped_value, 16))) pos = escape_pos + 2 + len(escaped_value) continue else: add(next_char) # bogus escape. Just remove the backslash. else: add(next_char) pos = escape_pos + 2 if pos < len(string): add(string[pos:]) return ''.join(result) def tokenize(source: str, jsx: bool = True, dotted: bool = True, template_string: bool = True, lineno: int = 1) -> Generator[Token, None, None]: """ Tokenize JavaScript/JSX source. Returns a generator of tokens. :param jsx: Enable (limited) JSX parsing. :param dotted: Read dotted names as single name token. :param template_string: Support ES6 template strings :param lineno: starting line number (optional) """ may_divide = False pos = 0 end = len(source) rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string) while pos < end: # handle regular rules first for token_type, rule in rules: # noqa: B007 match = rule.match(source, pos) if match is not None: break # if we don't have a match we don't give up yet, but check for # division operators or regular expression literals, based on # the status of `may_divide` which is determined by the last # processed non-whitespace token using `indicates_division`. else: if may_divide: match = division_re.match(source, pos) token_type = 'operator' else: match = regex_re.match(source, pos) token_type = 'regexp' if match is None: # woops. invalid syntax. jump one char ahead and try again. pos += 1 continue token_value = match.group() if token_type is not None: token = Token(token_type, token_value, lineno) may_divide = indicates_division(token) yield token lineno += len(line_re.findall(token_value)) pos = match.end()