123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- #!/usr/bin/env python3
- #
- # Version 1.0.2 - Build 43
- #############################################################################
- # Change log:
- # 7 May 2019, ondratu , Initial
- # 13 June 2019, 3d-gussner, Fix length false positives
- # 14 Sep. 2019, 3d-gussner, Prepare adding new language
- # 18 Sep. 2020, 3d-gussner, Fix execution of lang-check.py
- # 2 Apr. 2021, 3d-gussner, Fix and improve text warp
- # 22 Apr. 2021, DRracer , add English source to output
- # 23 Apr. 2021, wavexx , improve
- # 24 Apr. 2021, wavexx , improve
- # 26 Apr. 2021, wavexx , add character ruler
- # 21 Dec. 2021, 3d-gussner, Prepare more community languages
- # Swedish
- # Danish
- # Slovanian
- # Hungarian
- # Luxembourgian
- # Croatian
- # 3 Jan. 2022, 3d-gussner, Prepare Lithuanian
- # 7 Jan. 2022, 3d-gussner, Check for Syntax errors and exit with error
- # , add Build number 'git rev-list --count HEAD lang-check.py'
- # 30 Jan. 2022, 3d-gussner, Add arguments. Requested by @AttilaSVK
- # --information == output all source and translated messages
- # --import-check == used by `lang-import.sh`to verify
- # newly import `lang_en_??.txt` files
- # 14 Mar. 2022, 3d-gussner, Check if translation isn't equal to origin
- #############################################################################
- """Check PO files for formatting errors."""
- from argparse import ArgumentParser
- from sys import stdout, stderr, exit
- import codecs
- import polib
- import textwrap
- import re
- import os
- from lib import charset as cs
- from lib.io import load_map
- COLORIZE = (stdout.isatty() and os.getenv("TERM", "dumb") != "dumb") or os.getenv('NO_COLOR') == "0"
- LCD_WIDTH = 20
- def color_maybe(color_attr, text):
- if COLORIZE:
- return '\033[0;' + str(color_attr) + 'm' + text + '\033[0m'
- else:
- return text
- red = lambda text: color_maybe(31, text)
- green = lambda text: color_maybe(32, text)
- yellow = lambda text: color_maybe(33, text)
- cyan = lambda text: color_maybe(36, text)
- def print_wrapped(wrapped_text, rows, cols):
- if type(wrapped_text) == str:
- wrapped_text = [wrapped_text]
- for r, line in enumerate(wrapped_text):
- r_ = str(r + 1).rjust(3)
- if r >= rows:
- r_ = red(r_)
- print((' {} |{:' + str(cols) + 's}|').format(r_, line))
- def print_truncated(text, cols):
- if len(text) <= cols:
- prefix = text.ljust(cols)
- suffix = ''
- else:
- prefix = text[0:cols]
- suffix = red(text[cols:])
- print(' |' + prefix + '|' + suffix)
- def print_ruler(spc, cols):
- print(' ' * spc + cyan(('₀₁₂₃₄₅₆₇₈₉'*4)[:cols]))
- def print_source_translation(source, translation, wrapped_source, wrapped_translation, rows, cols):
- if rows == 1:
- print(' source text:')
- print_ruler(4, cols);
- print_truncated(source, cols)
- print(' translated text:')
- print_ruler(4, cols);
- print_truncated(translation, cols)
- else:
- print(' source text:')
- print_ruler(6, cols);
- print_wrapped(wrapped_source, rows, cols)
- print(' translated text:')
- print_ruler(6, cols);
- print_wrapped(wrapped_translation, rows, cols)
- print()
- def highlight_trailing_white(text):
- if type(text) == str:
- return re.sub(r' $', '·', text)
- else:
- ret = text[:]
- ret[-1] = highlight_trailing_white(ret[-1])
- return ret
- def wrap_text(text, cols):
- ret = []
- for line in text.split('\n'):
- # wrap each input line in text individually
- tmp = list(textwrap.TextWrapper(width=cols).wrap(line))
- if len(ret):
- # add back trailing whitespace
- tmp[-1] += ' ' * (len(text) - len(text.rstrip()))
- ret.extend(tmp)
- return ret
- def ign_char_first(c):
- return c.isalnum() or c in {'%', '?'}
- def ign_char_last(c):
- return c.isalnum() or c in {'.', "'"}
- def check_translation(entry, msgids, is_pot, no_warning, no_suggest, warn_empty, warn_same, information, shorter):
- """Check strings to display definition."""
- # do not check obsolete/deleted entriees
- if entry.obsolete:
- return True
- # fetch/decode entry for easy access
- meta = entry.comment.split('\n', 1)[0]
- source = entry.msgid
- translation = entry.msgstr
- line = entry.linenum
- known_msgid = msgids is None or source in msgids
- errors = 0
- # Check comment syntax (non-empty and include a MSG id)
- if known_msgid or warn_empty:
- if len(meta) == 0:
- print(red("[E]: Translation doesn't contain any comment metadata on line %d" % line))
- return False
- if not meta.startswith('MSG'):
- print(red("[E]: Critical syntax error: comment doesn't start with MSG on line %d" % line))
- print(red(" comment: " + meta))
- return False
- # Check if columns and rows are defined
- tokens = meta.split(' ')
- cols = None
- rows = None
- for item in tokens[1:]:
- try:
- key, val = item.split('=')
- if key == 'c':
- cols = int(val)
- elif key == 'r':
- rows = int(val)
- else:
- raise ValueError
- except ValueError:
- print(red("[E]: Invalid display definition on line %d" % line))
- print(red(" definition: " + meta))
- return False
- if not cols:
- if not no_warning and known_msgid and not rows:
- errors += 1
- print(yellow("[W]: No usable display definition on line %d" % line))
- # probably fullscreen, guess from the message length to continue checking
- cols = LCD_WIDTH
- if cols > LCD_WIDTH:
- errors += 1
- print(yellow("[W]: Invalid column count on line %d" % line))
- if not rows:
- rows = 1
- elif rows > 1 and cols != LCD_WIDTH:
- errors += 1
- print(yellow("[W]: Multiple rows with odd number of columns on line %d" % line))
- # Check if translation contains unsupported characters
- invalid_char = cs.translation_check(cs.unicode_to_source(translation))
- if invalid_char is not None:
- print(red('[E]: Critical syntax: Unhandled char %s found on line %d' % (repr(invalid_char), line)))
- print(red(' translation: ' + translation))
- return False
- # Pre-process the translation to translated characters for a correct preview and length check
- translation = cs.trans_replace(translation)
- wrapped_source = wrap_text(source, cols)
- rows_count_source = len(wrapped_source)
- wrapped_translation = wrap_text(translation, cols)
- rows_count_translation = len(wrapped_translation)
- # Incorrect number of rows/cols on the definition
- if rows == 1 and (len(source) > cols or rows_count_source > rows):
- errors += 1
- print(yellow('[W]: Source text longer than %d cols as defined on line %d:' % (cols, line)))
- print_ruler(4, cols);
- print_truncated(source, cols)
- print()
- elif rows_count_source > rows:
- errors += 1
- print(yellow('[W]: Wrapped source text longer than %d rows as defined on line %d:' % (rows, line)))
- print_ruler(6, cols);
- print_wrapped(wrapped_source, rows, cols)
- print()
- # All further checks are against the translation
- if is_pot:
- return (errors == 0)
- # Missing translation
- if len(translation) == 0 and (known_msgid or warn_empty):
- errors += 1
- if rows == 1:
- print(yellow("[W]: Empty translation for \"%s\" on line %d" % (source, line)))
- else:
- print(yellow("[W]: Empty translation on line %d" % line))
- print_ruler(6, cols);
- print_wrapped(wrapped_source, rows, cols)
- print()
- # Check for translation length too long
- if (rows_count_translation > rows) or (rows == 1 and len(translation) > cols):
- errors += 1
- print(red('[E]: Text is longer than definition on line %d: cols=%d rows=%d (rows diff=%d)'
- % (line, cols, rows, rows_count_translation-rows)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # Check for translation length shorter
- if shorter and (rows_count_translation < rows-1):
- print(yellow('[S]: Text is shorter than definition on line %d: cols=%d rows=%d (rows diff=%d)'
- % (line, cols, rows, rows_count_translation-rows)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # Different count of % sequences
- if source.count('%') != translation.count('%') and len(translation) > 0:
- errors += 1
- print(red('[E]: Unequal count of %% escapes on line %d:' % (line)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # Different first/last character
- if not no_suggest and len(source) > 0 and len(translation) > 0:
- source_end = source.rstrip()[-1]
- translation_end = translation.rstrip()[-1]
- start_diff = not (ign_char_first(source[0]) and ign_char_first(translation[0])) and source[0] != translation[0]
- end_diff = not (ign_char_last(source_end) and ign_char_last(translation_end)) and source_end != translation_end
- if start_diff or end_diff:
- if start_diff:
- print(yellow('[S]: Differing first punctuation character (%s => %s) on line %d:' % (source[0], translation[0], line)))
- if end_diff:
- print(yellow('[S]: Differing last punctuation character (%s => %s) on line %d:' % (source[-1], translation[-1], line)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- if not no_suggest and source == translation and (warn_same or len(source.split(' ', 1)) > 1):
- print(yellow('[S]: Translation same as original on line %d:' %line))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # Short translation
- if not no_suggest and len(source) > 0 and len(translation) > 0:
- if len(translation.rstrip()) < len(source.rstrip()) / 2:
- print(yellow('[S]: Short translation on line %d:' % (line)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # Incorrect trailing whitespace in translation
- if not no_warning and len(translation) > 0 and \
- (source.rstrip() == source or (rows == 1 and len(source) == cols)) and \
- translation.rstrip() != translation and \
- (rows > 1 or len(translation) != len(source)):
- errors += 1
- print(yellow('[W]: Incorrect trailing whitespace for translation on line %d:' % (line)))
- source = highlight_trailing_white(source)
- translation = highlight_trailing_white(translation)
- wrapped_translation = highlight_trailing_white(wrapped_translation)
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- # show the information
- if information and errors == 0:
- print(green('[I]: %s' % (meta)))
- print_source_translation(source, translation,
- wrapped_source, wrapped_translation,
- rows, cols)
- return (errors == 0)
- def main():
- """Main function."""
- parser = ArgumentParser(description=__doc__)
- parser.add_argument("po", help="PO file to check")
- parser.add_argument(
- "--no-warning", action="store_true",
- help="Disable warnings")
- parser.add_argument(
- "--no-suggest", action="store_true",
- help="Disable suggestions")
- parser.add_argument(
- "--pot", action="store_true",
- help="Do not check translations")
- parser.add_argument(
- "--information", action="store_true",
- help="Output all translations")
- parser.add_argument("--map",
- help="Provide a map file to suppress warnings about unused translations")
- parser.add_argument(
- "--warn-empty", action="store_true",
- help="Warn about empty definitions and translations even if unused")
- parser.add_argument(
- "--warn-same", action="store_true",
- help="Warn about one-word translations which are identical to the source")
- parser.add_argument(
- "--shorter", action="store_true",
- help="Show message if it is shorter than expected.")
- # load the translations
- args = parser.parse_args()
- if not os.path.isfile(args.po):
- print("{}: file does not exist or is not a regular file".format(args.po), file=stderr)
- return 1
- # load the symbol map to supress empty (but unused) translation warnings
- msgids = None
- if args.map:
- msgids = set()
- for sym in load_map(args.map):
- if type(sym['data']) == bytes:
- msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
- msgids.add(msgid)
- # check each translation in turn
- status = True
- for translation in polib.pofile(args.po):
- status &= check_translation(translation, msgids, args.pot, args.no_warning, args.no_suggest,
- args.warn_empty, args.warn_same, args.information, args.shorter)
- return 0 if status else 1
- if __name__ == "__main__":
- exit(main())
|