|
| 1 | +"""Generates a width table for Unicode characters. |
| 2 | +
|
| 3 | +This script generates a width table for Unicode characters that are not |
| 4 | +narrow (width 1). The table is written to src/black/_width_table.py (note |
| 5 | +that although this file is generated, it is checked into Git) and is used |
| 6 | +by the char_width() function in src/black/strings.py. |
| 7 | +
|
| 8 | +You should run this script when you upgrade wcwidth, which is expected to |
| 9 | +happen when a new Unicode version is released. The generated table contains |
| 10 | +the version of wcwidth and Unicode that it was generated for. |
| 11 | +
|
| 12 | +In order to run this script, you need to install the latest version of wcwidth. |
| 13 | +You can do this by running: |
| 14 | +
|
| 15 | + pip install -U wcwidth |
| 16 | +
|
| 17 | +""" |
| 18 | +import sys |
| 19 | +from os.path import basename, dirname, join |
| 20 | +from typing import Iterable, Tuple |
| 21 | + |
| 22 | +import wcwidth |
| 23 | + |
| 24 | + |
| 25 | +def make_width_table() -> Iterable[Tuple[int, int, int]]: |
| 26 | + start_codepoint = -1 |
| 27 | + end_codepoint = -1 |
| 28 | + range_width = -2 |
| 29 | + for codepoint in range(0, sys.maxunicode + 1): |
| 30 | + width = wcwidth.wcwidth(chr(codepoint)) |
| 31 | + if width <= 1: |
| 32 | + # Ignore narrow characters along with zero-width characters so that |
| 33 | + # they are treated as single-width. Note that treating zero-width |
| 34 | + # characters as single-width is consistent with the heuristics built |
| 35 | + # on top of str.isascii() in the str_width() function in strings.py. |
| 36 | + continue |
| 37 | + if start_codepoint < 0: |
| 38 | + start_codepoint = codepoint |
| 39 | + range_width = width |
| 40 | + elif width != range_width or codepoint != end_codepoint + 1: |
| 41 | + yield (start_codepoint, end_codepoint, range_width) |
| 42 | + start_codepoint = codepoint |
| 43 | + range_width = width |
| 44 | + end_codepoint = codepoint |
| 45 | + if start_codepoint >= 0: |
| 46 | + yield (start_codepoint, end_codepoint, range_width) |
| 47 | + |
| 48 | + |
| 49 | +def main() -> None: |
| 50 | + table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py") |
| 51 | + with open(table_path, "w") as f: |
| 52 | + f.write( |
| 53 | + f"""# Generated by {basename(__file__)} |
| 54 | +# wcwidth {wcwidth.__version__} |
| 55 | +# Unicode {wcwidth.list_versions()[-1]} |
| 56 | +import sys |
| 57 | +from typing import List, Tuple |
| 58 | +
|
| 59 | +if sys.version_info < (3, 8): |
| 60 | + from typing_extensions import Final |
| 61 | +else: |
| 62 | + from typing import Final |
| 63 | +
|
| 64 | +WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [ |
| 65 | +""" |
| 66 | + ) |
| 67 | + for triple in make_width_table(): |
| 68 | + f.write(f" {triple!r},\n") |
| 69 | + f.write("]\n") |
| 70 | + |
| 71 | + |
| 72 | +if __name__ == "__main__": |
| 73 | + main() |
0 commit comments