lang-check.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. #!/usr/bin/env python3
  2. #
  3. # Version 1.0.2 - Build 37
  4. #############################################################################
  5. # Change log:
  6. # 7 May 2019, Ondrej Tuma, Initial
  7. # 9 June 2020, 3d-gussner, Added version and Change log
  8. # 9 June 2020, 3d-gussner, Wrap text to 20 char and rows
  9. # 9 June 2020, 3d-gussner, colored output
  10. # 2 Apr. 2021, 3d-gussner, Fix and improve text warp
  11. # 22 Apr. 2021, DRracer , add English source to output
  12. # 23 Apr. 2021, wavexx , improve
  13. # 24 Apr. 2021, wavexx , improve
  14. # 26 Apr. 2021, 3d-gussner, add character ruler
  15. # 07 Jan. 2022, 3d-gussner, Check for Syntax errors and exit with error
  16. # , add Build number 'git rev-list --count HEAD lang-check.py'
  17. #############################################################################
  18. #
  19. # Expected syntax of the files, which other scripts depend on
  20. # 'lang_en.txt'
  21. # 1st line: '#MSG_'<some text>' c='<max chars in a column>' r='<max rows> ; '#MSG' is mandentory while 'c=' and 'r=' aren't but should be there
  22. # 2nd line: '"'<origin message used in the source code>'"' ; '"' double quotes at the beginning and end of message are mandentory
  23. # 3rd line: LF ; Line feed is mandantory between messages
  24. #
  25. # 'lang_en_??.txt'
  26. # 1st line: '#MSG_'<some text>' c='<max chars in a column>' r='<max rows> ; '#MSG' is mandentory while 'c=' and 'r=' aren't but should be there
  27. # 2nd line: '"'<origin message used in the source code>'"' ; '"' double quotes at the beginning and end of message are mandentory
  28. # 3rd line: '"'<translated message>'"' ; '"' double quotes at the beginning and end of message are mandentory
  29. # 4th line: LF ; Line feed is mandantory between messages
  30. #
  31. """Check lang files."""
  32. from argparse import ArgumentParser
  33. from traceback import print_exc
  34. from sys import stdout, stderr, exit
  35. import textwrap
  36. import re
  37. def color_maybe(color_attr, text):
  38. if stdout.isatty():
  39. return '\033[0;' + str(color_attr) + 'm' + text + '\033[0m'
  40. else:
  41. return text
  42. red = lambda text: color_maybe(31, text)
  43. green = lambda text: color_maybe(32, text)
  44. yellow = lambda text: color_maybe(33, text)
  45. cyan = lambda text: color_maybe(36, text)
  46. def print_wrapped(wrapped_text, rows, cols):
  47. if type(wrapped_text) == str:
  48. wrapped_text = [wrapped_text]
  49. for r, line in enumerate(wrapped_text):
  50. r_ = str(r + 1).rjust(3)
  51. if r >= rows:
  52. r_ = red(r_)
  53. print((' {} |{:' + str(cols) + 's}|').format(r_, line))
  54. def print_truncated(text, cols):
  55. if len(text) <= cols:
  56. prefix = text.ljust(cols)
  57. suffix = ''
  58. else:
  59. prefix = text[0:cols]
  60. suffix = red(text[cols:])
  61. print(' |' + prefix + '|' + suffix)
  62. def print_ruler(spc, cols):
  63. print(' ' * spc + cyan(('₀₁₂₃₄₅₆₇₈₉'*4)[:cols]))
  64. def print_source_translation(source, translation, wrapped_source, wrapped_translation, rows, cols):
  65. if rows == 1:
  66. print(' source text:')
  67. print_ruler(4, cols);
  68. print_truncated(source, cols)
  69. print(' translated text:')
  70. print_ruler(4, cols);
  71. print_truncated(translation, cols)
  72. else:
  73. print(' source text:')
  74. print_ruler(6, cols);
  75. print_wrapped(wrapped_source, rows, cols)
  76. print(' translated text:')
  77. print_ruler(6, cols);
  78. print_wrapped(wrapped_translation, rows, cols)
  79. print()
  80. def highlight_trailing_white(text):
  81. if type(text) == str:
  82. return re.sub(r' $', '·', text)
  83. else:
  84. ret = text[:]
  85. ret[-1] = highlight_trailing_white(ret[-1])
  86. return ret
  87. def wrap_text(text, cols):
  88. # wrap text
  89. ret = list(textwrap.TextWrapper(width=cols).wrap(text))
  90. if len(ret):
  91. # add back trailing whitespace
  92. ret[-1] += ' ' * (len(text) - len(text.rstrip()))
  93. return ret
  94. def unescape(text):
  95. if '\\' not in text:
  96. return text
  97. return text.encode('ascii').decode('unicode_escape')
  98. def ign_char_first(c):
  99. return c.isalnum() or c in {'%', '?'}
  100. def ign_char_last(c):
  101. return c.isalnum() or c in {'.', "'"}
  102. def parse_txt(lang, no_warning, warn_empty):
  103. """Parse txt file and check strings to display definition."""
  104. if lang == "en":
  105. file_path = "lang_en.txt"
  106. else:
  107. file_path = "lang_en_%s.txt" % lang
  108. print(green("Start %s lang-check" % lang))
  109. lines = 1
  110. with open(file_path) as src:
  111. while True:
  112. message = src.readline()
  113. #print(message) #Debug
  114. #check syntax 1st line starts with `#MSG`
  115. if (message[0:4] != '#MSG'):
  116. print(red("[E]: Critical syntax error: 1st line doesn't start with #MSG on line %d" % lines))
  117. print(red(message))
  118. exit(1)
  119. #Check if columns and rows are defined
  120. comment = message.split(' ')
  121. #Check if columns and rows are defined
  122. cols = None
  123. rows = None
  124. for item in comment[1:]:
  125. key, val = item.split('=')
  126. if key == 'c':
  127. cols = int(val)
  128. #print ("c=",cols) #Debug
  129. elif key == 'r':
  130. rows = int(val)
  131. #print ("r=",rows) #Debug
  132. else:
  133. raise RuntimeError(
  134. "Unknown display definition %s on line %d" %
  135. (' '.join(comment), lines))
  136. if cols is None and rows is None:
  137. if not no_warning:
  138. print(yellow("[W]: No display definition on line %d" % lines))
  139. cols = len(translation) # propably fullscreen
  140. if rows is None:
  141. rows = 1
  142. elif rows > 1 and cols != 20:
  143. print(yellow("[W]: Multiple rows with odd number of columns on line %d" % lines))
  144. #Wrap text to 20 chars and rows
  145. source = src.readline()[:-1] #read whole line
  146. #check if 2nd line of origin message beginns and ends with " double quote
  147. if (source[0]!="\""):
  148. print(red('[E]: Critical syntax error: Missing " at beginning of message in source on line %d' % lines))
  149. print(red(source))
  150. exit(1)
  151. if (source[-1]=="\""):
  152. source = source.strip('"') #remove " double quotes from message
  153. else:
  154. print(red('[E]: Critical syntax error: Missing " at end of message in source on line %d' % lines))
  155. print(red(source))
  156. exit(1)
  157. #print (source) #Debug
  158. translation = src.readline()[:-1]#read whole line
  159. #check if 3rd line of translation message beginns and ends with " double quote
  160. if (translation[0]!="\""):
  161. print(red('[E]: Critical syntax error: Missing " at beginning of message in translation on line %d' % lines))
  162. print(red(translation))
  163. exit(1)
  164. if (translation[-1]=="\""):
  165. #print ("End ok")
  166. translation = translation.strip('"') #remove " double quote from message
  167. else:
  168. print(red('[E]: Critical syntax error: Missing " at end of message in translation on line %d' % lines))
  169. print(red(translation))
  170. exit(1)
  171. #print (translation)
  172. if translation == '\\x00':
  173. # crude hack to handle intentionally-empty translations
  174. translation = ''
  175. #check if source is ascii only
  176. if source.isascii() == False:
  177. print(red('[E]: Critical syntax: Non ascii chars found on line %d' % lines))
  178. print(red(source))
  179. exit(1)
  180. #check if translation is ascii only
  181. if translation.isascii() == False:
  182. print(red('[E]: Critical syntax: Non ascii chars found on line %d' % lines))
  183. print(red(translation))
  184. exit(1)
  185. # handle backslash sequences
  186. source = unescape(source)
  187. translation = unescape(translation)
  188. #print (translation) #Debug
  189. wrapped_source = wrap_text(source, cols)
  190. rows_count_source = len(wrapped_source)
  191. wrapped_translation = wrap_text(translation, cols)
  192. rows_count_translation = len(wrapped_translation)
  193. # Check for potential errors in the definition
  194. if not no_warning:
  195. # Incorrect number of rows/cols on the definition
  196. if rows == 1 and (len(source) > cols or rows_count_source > rows):
  197. print(yellow('[W]: Source text longer than %d cols as defined on line %d:' % (cols, lines)))
  198. print_ruler(4, cols);
  199. print_truncated(source, cols)
  200. print()
  201. elif rows_count_source > rows:
  202. print(yellow('[W]: Wrapped source text longer than %d rows as defined on line %d:' % (rows, lines)))
  203. print_ruler(6, cols);
  204. print_wrapped(wrapped_source, rows, cols)
  205. print()
  206. # Missing translation
  207. if len(translation) == 0 and (warn_empty or rows > 1):
  208. if rows == 1:
  209. print(yellow("[W]: Empty translation for \"%s\" on line %d" % (source, lines)))
  210. else:
  211. print(yellow("[W]: Empty translation on line %d" % lines))
  212. print_ruler(6, cols);
  213. print_wrapped(wrapped_source, rows, cols)
  214. print()
  215. # Check for translation lenght
  216. if (rows_count_translation > rows) or (rows == 1 and len(translation) > cols):
  217. print(red('[E]: Text is longer than definition on line %d: cols=%d rows=%d (rows diff=%d)'
  218. % (lines, cols, rows, rows_count_translation-rows)))
  219. print_source_translation(source, translation,
  220. wrapped_source, wrapped_translation,
  221. rows, cols)
  222. # Different count of % sequences
  223. if source.count('%') != translation.count('%') and len(translation) > 0:
  224. print(red('[E]: Unequal count of %% escapes on line %d:' % (lines)))
  225. print_source_translation(source, translation,
  226. wrapped_source, wrapped_translation,
  227. rows, cols)
  228. # Different first/last character
  229. if not no_warning and len(source) > 0 and len(translation) > 0:
  230. source_end = source.rstrip()[-1]
  231. translation_end = translation.rstrip()[-1]
  232. start_diff = not (ign_char_first(source[0]) and ign_char_first(translation[0])) and source[0] != translation[0]
  233. end_diff = not (ign_char_last(source_end) and ign_char_last(translation_end)) and source_end != translation_end
  234. if start_diff or end_diff:
  235. if start_diff:
  236. print(yellow('[W]: Differing first punctuation character (%s => %s) on line %d:' % (source[0], translation[0], lines)))
  237. if end_diff:
  238. print(yellow('[W]: Differing last punctuation character (%s => %s) on line %d:' % (source[-1], translation[-1], lines)))
  239. print_source_translation(source, translation,
  240. wrapped_source, wrapped_translation,
  241. rows, cols)
  242. # Short translation
  243. if not no_warning and len(source) > 0 and len(translation) > 0:
  244. if len(translation.rstrip()) < len(source.rstrip()) / 2:
  245. print(yellow('[W]: Short translation on line %d:' % (lines)))
  246. print_source_translation(source, translation,
  247. wrapped_source, wrapped_translation,
  248. rows, cols)
  249. # Incorrect trailing whitespace in translation
  250. if not no_warning and len(translation) > 0 and \
  251. (source.rstrip() == source or (rows == 1 and len(source) == cols)) and \
  252. translation.rstrip() != translation and \
  253. (rows > 1 or len(translation) != len(source)):
  254. print(yellow('[W]: Incorrect trailing whitespace for translation on line %d:' % (lines)))
  255. source = highlight_trailing_white(source)
  256. translation = highlight_trailing_white(translation)
  257. wrapped_translation = highlight_trailing_white(wrapped_translation)
  258. print_source_translation(source, translation,
  259. wrapped_source, wrapped_translation,
  260. rows, cols)
  261. if len(src.readline()) != 1: # empty line
  262. print(red('[E]: Critical Syntax error: Missing empty line between messages between lines: %d and %d' % (lines+3,lines+4)))
  263. break
  264. lines += 4
  265. print(green("End %s lang-check" % lang))
  266. def main():
  267. """Main function."""
  268. parser = ArgumentParser(
  269. description=__doc__,
  270. usage="%(prog)s lang")
  271. parser.add_argument(
  272. "lang", nargs='?', default="en", type=str,
  273. help="Check lang file (en|cs|da|de|es|fr|hr|hu|lb|lt|nl|it|pl|ro|sl|sv)")
  274. parser.add_argument(
  275. "--no-warning", action="store_true",
  276. help="Disable warnings")
  277. parser.add_argument(
  278. "--warn-empty", action="store_true",
  279. help="Warn about empty translations")
  280. args = parser.parse_args()
  281. try:
  282. parse_txt(args.lang, args.no_warning, args.warn_empty)
  283. return 0
  284. except Exception as exc:
  285. print_exc()
  286. parser.error("%s" % exc)
  287. return 1
  288. if __name__ == "__main__":
  289. exit(main())