lang-check.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. #!/usr/bin/env python3
  2. #
  3. # Version 1.0.2 - Build 43
  4. #############################################################################
  5. # Change log:
  6. # 7 May 2019, ondratu , Initial
  7. # 13 June 2019, 3d-gussner, Fix length false positives
  8. # 14 Sep. 2019, 3d-gussner, Prepare adding new language
  9. # 18 Sep. 2020, 3d-gussner, Fix execution of lang-check.py
  10. # 2 Apr. 2021, 3d-gussner, Fix and improve text warp
  11. # 22 Apr. 2021, DRracer , add English source to output
  12. # 23 Apr. 2021, wavexx , improve
  13. # 24 Apr. 2021, wavexx , improve
  14. # 26 Apr. 2021, wavexx , add character ruler
  15. # 21 Dec. 2021, 3d-gussner, Prepare more community languages
  16. # Swedish
  17. # Danish
  18. # Slovanian
  19. # Hungarian
  20. # Luxembourgian
  21. # Croatian
  22. # 3 Jan. 2022, 3d-gussner, Prepare Lithuanian
  23. # 7 Jan. 2022, 3d-gussner, Check for Syntax errors and exit with error
  24. # , add Build number 'git rev-list --count HEAD lang-check.py'
  25. # 30 Jan. 2022, 3d-gussner, Add arguments. Requested by @AttilaSVK
  26. # --information == output all source and translated messages
  27. # --import-check == used by `lang-import.sh`to verify
  28. # newly import `lang_en_??.txt` files
  29. # 14 Mar. 2022, 3d-gussner, Check if translation isn't equal to origin
  30. #############################################################################
  31. """Check PO files for formatting errors."""
  32. from argparse import ArgumentParser
  33. from sys import stdout, stderr, exit
  34. import codecs
  35. import polib
  36. import textwrap
  37. import re
  38. import os
  39. from lib import charset as cs
  40. from lib.io import load_map
  41. COLORIZE = (stdout.isatty() and os.getenv("TERM", "dumb") != "dumb") or os.getenv('NO_COLOR') == "0"
  42. LCD_WIDTH = 20
  43. def color_maybe(color_attr, text):
  44. if COLORIZE:
  45. return '\033[0;' + str(color_attr) + 'm' + text + '\033[0m'
  46. else:
  47. return text
  48. red = lambda text: color_maybe(31, text)
  49. green = lambda text: color_maybe(32, text)
  50. yellow = lambda text: color_maybe(33, text)
  51. cyan = lambda text: color_maybe(36, text)
  52. def print_wrapped(wrapped_text, rows, cols):
  53. if type(wrapped_text) == str:
  54. wrapped_text = [wrapped_text]
  55. for r, line in enumerate(wrapped_text):
  56. r_ = str(r + 1).rjust(3)
  57. if r >= rows:
  58. r_ = red(r_)
  59. print((' {} |{:' + str(cols) + 's}|').format(r_, line))
  60. def print_truncated(text, cols):
  61. if len(text) <= cols:
  62. prefix = text.ljust(cols)
  63. suffix = ''
  64. else:
  65. prefix = text[0:cols]
  66. suffix = red(text[cols:])
  67. print(' |' + prefix + '|' + suffix)
  68. def print_ruler(spc, cols):
  69. print(' ' * spc + cyan(('₀₁₂₃₄₅₆₇₈₉'*4)[:cols]))
  70. def print_source_translation(source, translation, wrapped_source, wrapped_translation, rows, cols):
  71. if rows == 1:
  72. print(' source text:')
  73. print_ruler(4, cols);
  74. print_truncated(source, cols)
  75. print(' translated text:')
  76. print_ruler(4, cols);
  77. print_truncated(translation, cols)
  78. else:
  79. print(' source text:')
  80. print_ruler(6, cols);
  81. print_wrapped(wrapped_source, rows, cols)
  82. print(' translated text:')
  83. print_ruler(6, cols);
  84. print_wrapped(wrapped_translation, rows, cols)
  85. print()
  86. def highlight_trailing_white(text):
  87. if type(text) == str:
  88. return re.sub(r' $', '·', text)
  89. else:
  90. ret = text[:]
  91. ret[-1] = highlight_trailing_white(ret[-1])
  92. return ret
  93. def wrap_text(text, cols):
  94. ret = []
  95. for line in text.split('\n'):
  96. # wrap each input line in text individually
  97. tmp = list(textwrap.TextWrapper(width=cols).wrap(line))
  98. if len(ret):
  99. # add back trailing whitespace
  100. tmp[-1] += ' ' * (len(text) - len(text.rstrip()))
  101. ret.extend(tmp)
  102. return ret
  103. def ign_char_first(c):
  104. return c.isalnum() or c in {'%', '?'}
  105. def ign_char_last(c):
  106. return c.isalnum() or c in {'.', "'"}
  107. def check_translation(entry, msgids, is_pot, no_warning, no_suggest, warn_empty, warn_same, information, shorter):
  108. """Check strings to display definition."""
  109. # do not check obsolete/deleted entriees
  110. if entry.obsolete:
  111. return True
  112. # fetch/decode entry for easy access
  113. meta = entry.comment.split('\n', 1)[0]
  114. source = entry.msgid
  115. translation = entry.msgstr
  116. line = entry.linenum
  117. known_msgid = msgids is None or source in msgids
  118. errors = 0
  119. # Check comment syntax (non-empty and include a MSG id)
  120. if known_msgid or warn_empty:
  121. if len(meta) == 0:
  122. print(red("[E]: Translation doesn't contain any comment metadata on line %d" % line))
  123. return False
  124. if not meta.startswith('MSG'):
  125. print(red("[E]: Critical syntax error: comment doesn't start with MSG on line %d" % line))
  126. print(red(" comment: " + meta))
  127. return False
  128. # Check if columns and rows are defined
  129. tokens = meta.split(' ')
  130. cols = None
  131. rows = None
  132. for item in tokens[1:]:
  133. try:
  134. key, val = item.split('=')
  135. if key == 'c':
  136. cols = int(val)
  137. elif key == 'r':
  138. rows = int(val)
  139. else:
  140. raise ValueError
  141. except ValueError:
  142. print(red("[E]: Invalid display definition on line %d" % line))
  143. print(red(" definition: " + meta))
  144. return False
  145. if not cols:
  146. if not no_warning and known_msgid and not rows:
  147. errors += 1
  148. print(yellow("[W]: No usable display definition on line %d" % line))
  149. # probably fullscreen, guess from the message length to continue checking
  150. cols = LCD_WIDTH
  151. if cols > LCD_WIDTH:
  152. errors += 1
  153. print(yellow("[W]: Invalid column count on line %d" % line))
  154. if not rows:
  155. rows = 1
  156. elif rows > 1 and cols != LCD_WIDTH:
  157. errors += 1
  158. print(yellow("[W]: Multiple rows with odd number of columns on line %d" % line))
  159. # Check if translation contains unsupported characters
  160. invalid_char = cs.translation_check(cs.unicode_to_source(translation))
  161. if invalid_char is not None:
  162. print(red('[E]: Critical syntax: Unhandled char %s found on line %d' % (repr(invalid_char), line)))
  163. print(red(' translation: ' + translation))
  164. return False
  165. # Pre-process the translation to translated characters for a correct preview and length check
  166. translation = cs.trans_replace(translation)
  167. wrapped_source = wrap_text(source, cols)
  168. rows_count_source = len(wrapped_source)
  169. wrapped_translation = wrap_text(translation, cols)
  170. rows_count_translation = len(wrapped_translation)
  171. # Incorrect number of rows/cols on the definition
  172. if rows == 1 and (len(source) > cols or rows_count_source > rows):
  173. errors += 1
  174. print(yellow('[W]: Source text longer than %d cols as defined on line %d:' % (cols, line)))
  175. print_ruler(4, cols);
  176. print_truncated(source, cols)
  177. print()
  178. elif rows_count_source > rows:
  179. errors += 1
  180. print(yellow('[W]: Wrapped source text longer than %d rows as defined on line %d:' % (rows, line)))
  181. print_ruler(6, cols);
  182. print_wrapped(wrapped_source, rows, cols)
  183. print()
  184. # All further checks are against the translation
  185. if is_pot:
  186. return (errors == 0)
  187. # Missing translation
  188. if len(translation) == 0 and (warn_empty or (not no_warning and known_msgid)):
  189. errors += 1
  190. if rows == 1:
  191. print(yellow("[W]: Empty translation for \"%s\" on line %d" % (source, line)))
  192. else:
  193. print(yellow("[W]: Empty translation on line %d" % line))
  194. print_ruler(6, cols);
  195. print_wrapped(wrapped_source, rows, cols)
  196. print()
  197. # Check for translation length too long
  198. if (rows_count_translation > rows) or (rows == 1 and len(translation) > cols):
  199. errors += 1
  200. print(red('[E]: Text is longer than definition on line %d: cols=%d rows=%d (rows diff=%d)'
  201. % (line, cols, rows, rows_count_translation-rows)))
  202. print_source_translation(source, translation,
  203. wrapped_source, wrapped_translation,
  204. rows, cols)
  205. # Check for translation length shorter
  206. if shorter and (rows_count_translation < rows-1):
  207. print(yellow('[S]: Text is shorter than definition on line %d: cols=%d rows=%d (rows diff=%d)'
  208. % (line, cols, rows, rows_count_translation-rows)))
  209. print_source_translation(source, translation,
  210. wrapped_source, wrapped_translation,
  211. rows, cols)
  212. # Different count of % sequences
  213. if source.count('%') != translation.count('%') and len(translation) > 0:
  214. errors += 1
  215. print(red('[E]: Unequal count of %% escapes on line %d:' % (line)))
  216. print_source_translation(source, translation,
  217. wrapped_source, wrapped_translation,
  218. rows, cols)
  219. # Different first/last character
  220. if not no_suggest and len(source) > 0 and len(translation) > 0:
  221. source_end = source.rstrip()[-1]
  222. translation_end = translation.rstrip()[-1]
  223. start_diff = not (ign_char_first(source[0]) and ign_char_first(translation[0])) and source[0] != translation[0]
  224. end_diff = not (ign_char_last(source_end) and ign_char_last(translation_end)) and source_end != translation_end
  225. if start_diff or end_diff:
  226. if start_diff:
  227. print(yellow('[S]: Differing first punctuation character (%s => %s) on line %d:' % (source[0], translation[0], line)))
  228. if end_diff:
  229. print(yellow('[S]: Differing last punctuation character (%s => %s) on line %d:' % (source[-1], translation[-1], line)))
  230. print_source_translation(source, translation,
  231. wrapped_source, wrapped_translation,
  232. rows, cols)
  233. if not no_suggest and source == translation and (warn_same or len(source.split(' ', 1)) > 1):
  234. print(yellow('[S]: Translation same as original on line %d:' %line))
  235. print_source_translation(source, translation,
  236. wrapped_source, wrapped_translation,
  237. rows, cols)
  238. # Short translation
  239. if not no_suggest and len(source) > 0 and len(translation) > 0:
  240. if len(translation.rstrip()) < len(source.rstrip()) / 2:
  241. print(yellow('[S]: Short translation on line %d:' % (line)))
  242. print_source_translation(source, translation,
  243. wrapped_source, wrapped_translation,
  244. rows, cols)
  245. # Incorrect trailing whitespace in translation
  246. if not no_warning and len(translation) > 0 and \
  247. (source.rstrip() == source or (rows == 1 and len(source) == cols)) and \
  248. translation.rstrip() != translation and \
  249. (rows > 1 or len(translation) != len(source)):
  250. errors += 1
  251. print(yellow('[W]: Incorrect trailing whitespace for translation on line %d:' % (line)))
  252. source = highlight_trailing_white(source)
  253. translation = highlight_trailing_white(translation)
  254. wrapped_translation = highlight_trailing_white(wrapped_translation)
  255. print_source_translation(source, translation,
  256. wrapped_source, wrapped_translation,
  257. rows, cols)
  258. # show the information
  259. if information and errors == 0:
  260. print(green('[I]: %s' % (meta)))
  261. print_source_translation(source, translation,
  262. wrapped_source, wrapped_translation,
  263. rows, cols)
  264. return (errors == 0)
  265. def main():
  266. """Main function."""
  267. parser = ArgumentParser(description=__doc__)
  268. parser.add_argument("po", help="PO file to check")
  269. parser.add_argument(
  270. "--no-warning", action="store_true",
  271. help="Disable warnings")
  272. parser.add_argument(
  273. "--no-suggest", action="store_true",
  274. help="Disable suggestions")
  275. parser.add_argument(
  276. "--errors-only", action="store_true",
  277. help="Only check errors")
  278. parser.add_argument(
  279. "--pot", action="store_true",
  280. help="Do not check translations")
  281. parser.add_argument(
  282. "--information", action="store_true",
  283. help="Output all translations")
  284. parser.add_argument("--map",
  285. help="Provide a map file to suppress warnings about unused translations")
  286. parser.add_argument(
  287. "--warn-empty", action="store_true",
  288. help="Warn about empty definitions and translations even if unused")
  289. parser.add_argument(
  290. "--warn-same", action="store_true",
  291. help="Warn about one-word translations which are identical to the source")
  292. parser.add_argument(
  293. "--shorter", action="store_true",
  294. help="Show message if it is shorter than expected.")
  295. # load the translations
  296. args = parser.parse_args()
  297. if not os.path.isfile(args.po):
  298. print("{}: file does not exist or is not a regular file".format(args.po), file=stderr)
  299. return 1
  300. if args.errors_only:
  301. args.no_warning = True
  302. args.no_suggest = True
  303. # load the symbol map to supress empty (but unused) translation warnings
  304. msgids = None
  305. if args.map:
  306. msgids = set()
  307. for sym in load_map(args.map):
  308. if type(sym['data']) == bytes:
  309. msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
  310. msgids.add(msgid)
  311. # check each translation in turn
  312. status = True
  313. for translation in polib.pofile(args.po):
  314. status &= check_translation(translation, msgids, args.pot, args.no_warning, args.no_suggest,
  315. args.warn_empty, args.warn_same, args.information, args.shorter)
  316. return 0 if status else 1
  317. if __name__ == "__main__":
  318. exit(main())