lang-check.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #!/usr/bin/env python3
  2. #
  3. # Version 1.0.2 - Build 43
  4. #############################################################################
  5. # Change log:
  6. # 7 May 2019, ondratu , Initial
  7. # 13 June 2019, 3d-gussner, Fix length false positives
  8. # 14 Sep. 2019, 3d-gussner, Prepare adding new language
  9. # 18 Sep. 2020, 3d-gussner, Fix execution of lang-check.py
  10. # 2 Apr. 2021, 3d-gussner, Fix and improve text warp
  11. # 22 Apr. 2021, DRracer , add English source to output
  12. # 23 Apr. 2021, wavexx , improve
  13. # 24 Apr. 2021, wavexx , improve
  14. # 26 Apr. 2021, wavexx , add character ruler
  15. # 21 Dec. 2021, 3d-gussner, Prepare more community languages
  16. # Swedish
  17. # Danish
  18. # Slovanian
  19. # Hungarian
  20. # Luxembourgian
  21. # Croatian
  22. # 3 Jan. 2022, 3d-gussner, Prepare Lithuanian
  23. # 7 Jan. 2022, 3d-gussner, Check for Syntax errors and exit with error
  24. # , add Build number 'git rev-list --count HEAD lang-check.py'
  25. # 30 Jan. 2022, 3d-gussner, Add arguments. Requested by @AttilaSVK
  26. # --information == output all source and translated messages
  27. # --import-check == used by `lang-import.sh`to verify
  28. # newly import `lang_en_??.txt` files
  29. # 14 Mar. 2022, 3d-gussner, Check if translation isn't equal to origin
  30. #############################################################################
  31. #
  32. # Expected syntax of the files, which other scripts depend on
  33. # 'lang_en.txt'
  34. # 1st line: '#MSG_'<some text>' c='<max chars in a column>' r='<max rows> ; '#MSG' is mandentory while 'c=' and 'r=' aren't but should be there
  35. # 2nd line: '"'<origin message used in the source code>'"' ; '"' double quotes at the beginning and end of message are mandentory
  36. # 3rd line: LF ; Line feed is mandantory between messages
  37. #
  38. # 'lang_en_??.txt'
  39. # 1st line: '#MSG_'<some text>' c='<max chars in a column>' r='<max rows> ; '#MSG' is mandentory while 'c=' and 'r=' aren't but should be there
  40. # 2nd line: '"'<origin message used in the source code>'"' ; '"' double quotes at the beginning and end of message are mandentory
  41. # 3rd line: '"'<translated message>'"' ; '"' double quotes at the beginning and end of message are mandentory
  42. # 4th line: LF ; Line feed is mandantory between messages
  43. #
  44. """Check lang files."""
  45. from argparse import ArgumentParser
  46. from traceback import print_exc
  47. from sys import stdout, stderr, exit
  48. import textwrap
  49. import re
  50. def color_maybe(color_attr, text):
  51. if stdout.isatty():
  52. return '\033[0;' + str(color_attr) + 'm' + text + '\033[0m'
  53. else:
  54. return text
  55. red = lambda text: color_maybe(31, text)
  56. green = lambda text: color_maybe(32, text)
  57. yellow = lambda text: color_maybe(33, text)
  58. cyan = lambda text: color_maybe(36, text)
  59. def print_wrapped(wrapped_text, rows, cols):
  60. if type(wrapped_text) == str:
  61. wrapped_text = [wrapped_text]
  62. for r, line in enumerate(wrapped_text):
  63. r_ = str(r + 1).rjust(3)
  64. if r >= rows:
  65. r_ = red(r_)
  66. print((' {} |{:' + str(cols) + 's}|').format(r_, line))
  67. def print_truncated(text, cols):
  68. if len(text) <= cols:
  69. prefix = text.ljust(cols)
  70. suffix = ''
  71. else:
  72. prefix = text[0:cols]
  73. suffix = red(text[cols:])
  74. print(' |' + prefix + '|' + suffix)
  75. def print_ruler(spc, cols):
  76. print(' ' * spc + cyan(('₀₁₂₃₄₅₆₇₈₉'*4)[:cols]))
  77. def print_source_translation(source, translation, wrapped_source, wrapped_translation, rows, cols):
  78. if rows == 1:
  79. print(' source text:')
  80. print_ruler(4, cols);
  81. print_truncated(source, cols)
  82. print(' translated text:')
  83. print_ruler(4, cols);
  84. print_truncated(translation, cols)
  85. else:
  86. print(' source text:')
  87. print_ruler(6, cols);
  88. print_wrapped(wrapped_source, rows, cols)
  89. print(' translated text:')
  90. print_ruler(6, cols);
  91. print_wrapped(wrapped_translation, rows, cols)
  92. print()
  93. def highlight_trailing_white(text):
  94. if type(text) == str:
  95. return re.sub(r' $', '·', text)
  96. else:
  97. ret = text[:]
  98. ret[-1] = highlight_trailing_white(ret[-1])
  99. return ret
  100. def wrap_text(text, cols):
  101. # wrap text
  102. ret = list(textwrap.TextWrapper(width=cols).wrap(text))
  103. if len(ret):
  104. # add back trailing whitespace
  105. ret[-1] += ' ' * (len(text) - len(text.rstrip()))
  106. return ret
  107. def unescape(text):
  108. if '\\' not in text:
  109. return text
  110. return text.encode('ascii').decode('unicode_escape')
  111. def ign_char_first(c):
  112. return c.isalnum() or c in {'%', '?'}
  113. def ign_char_last(c):
  114. return c.isalnum() or c in {'.', "'"}
  115. def parse_txt(lang, no_warning, warn_empty, information, import_check):
  116. """Parse txt file and check strings to display definition."""
  117. if lang == "en":
  118. file_path = "lang_en.txt"
  119. else:
  120. if import_check:
  121. file_path = "po/new/lang_en_%s.txt" % lang
  122. else:
  123. file_path = "lang_en_%s.txt" % lang
  124. print(green("Start %s lang-check" % lang))
  125. lines = 0
  126. with open(file_path) as src:
  127. while True:
  128. message = src.readline()
  129. lines += 1
  130. #print(message) #Debug
  131. #check syntax 1st line starts with `#MSG`
  132. if (message[0:4] != '#MSG'):
  133. print(red("[E]: Critical syntax error: 1st line doesn't start with #MSG on line %d" % lines))
  134. print(red(message))
  135. exit(1)
  136. #Check if columns and rows are defined
  137. comment = message.split(' ')
  138. #Check if columns and rows are defined
  139. cols = None
  140. rows = None
  141. for item in comment[1:]:
  142. key, val = item.split('=')
  143. if key == 'c':
  144. cols = int(val)
  145. #print ("c=",cols) #Debug
  146. elif key == 'r':
  147. rows = int(val)
  148. #print ("r=",rows) #Debug
  149. else:
  150. raise RuntimeError(
  151. "Unknown display definition %s on line %d" %
  152. (' '.join(comment), lines))
  153. if cols is None and rows is None:
  154. if not no_warning:
  155. print(yellow("[W]: No display definition on line %d" % lines))
  156. cols = len(source) # propably fullscreen
  157. if rows is None:
  158. rows = 1
  159. elif rows > 1 and cols != 20:
  160. print(yellow("[W]: Multiple rows with odd number of columns on line %d" % lines))
  161. #Wrap text to 20 chars and rows
  162. source = src.readline()[:-1] #read whole line
  163. lines += 1
  164. #check if 2nd line of origin message beginns and ends with " double quote
  165. if (source[0]!="\""):
  166. print(red('[E]: Critical syntax error: Missing " double quotes at beginning of message in source on line %d' % lines))
  167. print(red(source))
  168. exit(1)
  169. if (source[-1]=="\""):
  170. source = source.strip('"') #remove " double quotes from message
  171. else:
  172. print(red('[E]: Critical syntax error: Missing " double quotes at end of message in source on line %d' % lines))
  173. print(red(source))
  174. exit(1)
  175. #print(source) #Debug
  176. if lang != "en":
  177. translation = src.readline()[:-1]#read whole line
  178. lines += 1
  179. #check if 3rd line of translation message beginns and ends with " double quote
  180. if (translation[0]!="\""):
  181. print(red('[E]: Critical syntax error: Missing " double quotes at beginning of message in translation on line %d' % lines))
  182. print(red(translation))
  183. exit(1)
  184. if (translation[-1]=="\""):
  185. #print ("End ok")
  186. translation = translation.strip('"') #remove " double quote from message
  187. else:
  188. print(red('[E]: Critical syntax error: Missing " double quotes at end of message in translation on line %d' % lines))
  189. print(red(translation))
  190. exit(1)
  191. #print(translation) #Debug
  192. if translation == '\\x00':
  193. # crude hack to handle intentionally-empty translations
  194. translation = ''
  195. #check if source is ascii only
  196. if source.isascii() == False:
  197. print(red('[E]: Critical syntax: Non ascii chars found on line %d' % lines))
  198. print(red(source))
  199. exit(1)
  200. #check if translation is ascii only
  201. if lang != "en":
  202. if translation.isascii() == False:
  203. print(red('[E]: Critical syntax: Non ascii chars found on line %d' % lines))
  204. print(red(translation))
  205. exit(1)
  206. # handle backslash sequences
  207. source = unescape(source)
  208. if lang != "en":
  209. translation = unescape(translation)
  210. #print (translation) #Debug
  211. wrapped_source = wrap_text(source, cols)
  212. rows_count_source = len(wrapped_source)
  213. if lang != "en":
  214. wrapped_translation = wrap_text(translation, cols)
  215. rows_count_translation = len(wrapped_translation)
  216. # Check for potential errors in the definition
  217. if not no_warning:
  218. # Incorrect number of rows/cols on the definition
  219. if rows == 1 and (len(source) > cols or rows_count_source > rows):
  220. print(yellow('[W]: Source text longer than %d cols as defined on line %d:' % (cols, lines)))
  221. print_ruler(4, cols);
  222. print_truncated(source, cols)
  223. print()
  224. elif rows_count_source > rows:
  225. print(yellow('[W]: Wrapped source text longer than %d rows as defined on line %d:' % (rows, lines)))
  226. print_ruler(6, cols);
  227. print_wrapped(wrapped_source, rows, cols)
  228. print()
  229. # Missing translation
  230. if lang != "en":
  231. if len(translation) == 0 and (warn_empty or rows > 1):
  232. if rows == 1:
  233. print(yellow("[W]: Empty translation for \"%s\" on line %d" % (source, lines)))
  234. else:
  235. print(yellow("[W]: Empty translation on line %d" % lines))
  236. print_ruler(6, cols);
  237. print_wrapped(wrapped_source, rows, cols)
  238. print()
  239. # Check for translation lenght
  240. if (rows_count_translation > rows) or (rows == 1 and len(translation) > cols):
  241. print(red('[E]: Text is longer than definition on line %d: cols=%d rows=%d (rows diff=%d)'
  242. % (lines, cols, rows, rows_count_translation-rows)))
  243. print_source_translation(source, translation,
  244. wrapped_source, wrapped_translation,
  245. rows, cols)
  246. # Different count of % sequences
  247. if source.count('%') != translation.count('%') and len(translation) > 0:
  248. print(red('[E]: Unequal count of %% escapes on line %d:' % (lines)))
  249. print_source_translation(source, translation,
  250. wrapped_source, wrapped_translation,
  251. rows, cols)
  252. # Different first/last character
  253. if not no_warning and len(source) > 0 and len(translation) > 0:
  254. source_end = source.rstrip()[-1]
  255. translation_end = translation.rstrip()[-1]
  256. start_diff = not (ign_char_first(source[0]) and ign_char_first(translation[0])) and source[0] != translation[0]
  257. end_diff = not (ign_char_last(source_end) and ign_char_last(translation_end)) and source_end != translation_end
  258. if start_diff or end_diff:
  259. if start_diff:
  260. print(yellow('[W]: Differing first punctuation character (%s => %s) on line %d:' % (source[0], translation[0], lines)))
  261. if end_diff:
  262. print(yellow('[W]: Differing last punctuation character (%s => %s) on line %d:' % (source[-1], translation[-1], lines)))
  263. print_source_translation(source, translation,
  264. wrapped_source, wrapped_translation,
  265. rows, cols)
  266. if not no_warning and source == translation:
  267. print(yellow('[W]: Translation same as origin on line %d:' %lines))
  268. print_source_translation(source, translation,
  269. wrapped_source, wrapped_translation,
  270. rows, cols)
  271. #elif information:
  272. # print(green('[I]: %s' % (message)))
  273. # print_source_translation(source, translation,
  274. # wrapped_source, wrapped_translation,
  275. # rows, cols)
  276. # Short translation
  277. if not no_warning and len(source) > 0 and len(translation) > 0:
  278. if len(translation.rstrip()) < len(source.rstrip()) / 2:
  279. print(yellow('[W]: Short translation on line %d:' % (lines)))
  280. print_source_translation(source, translation,
  281. wrapped_source, wrapped_translation,
  282. rows, cols)
  283. #elif information:
  284. # print(green('[I]: %s' % (message)))
  285. # print_source_translation(source, translation,
  286. # wrapped_source, wrapped_translation,
  287. # rows, cols)
  288. # Incorrect trailing whitespace in translation
  289. if not no_warning and len(translation) > 0 and \
  290. (source.rstrip() == source or (rows == 1 and len(source) == cols)) and \
  291. translation.rstrip() != translation and \
  292. (rows > 1 or len(translation) != len(source)):
  293. print(yellow('[W]: Incorrect trailing whitespace for translation on line %d:' % (lines)))
  294. source = highlight_trailing_white(source)
  295. translation = highlight_trailing_white(translation)
  296. wrapped_translation = highlight_trailing_white(wrapped_translation)
  297. print_source_translation(source, translation,
  298. wrapped_source, wrapped_translation,
  299. rows, cols)
  300. elif information:
  301. print(green('[I]: %s' % (message)))
  302. print_source_translation(source, translation,
  303. wrapped_source, wrapped_translation,
  304. rows, cols)
  305. delimiter = src.readline()
  306. lines += 1
  307. if ("" == delimiter):
  308. break
  309. elif len(delimiter) != 1: # empty line
  310. print(red('[E]: Critical Syntax error: Missing empty line between messages between lines: %d and %d' % (lines-1,lines)))
  311. break
  312. print(green("End %s lang-check" % lang))
  313. def main():
  314. """Main function."""
  315. parser = ArgumentParser(
  316. description=__doc__,
  317. usage="%(prog)s lang")
  318. parser.add_argument(
  319. "lang", nargs='?', default="en", type=str,
  320. help="Check lang file (en|cs|da|de|es|fr|hr|hu|it|lb|lt|nl|no|pl|ro|sk|sl|sv)")
  321. parser.add_argument(
  322. "--no-warning", action="store_true",
  323. help="Disable warnings")
  324. parser.add_argument(
  325. "--warn-empty", action="store_true",
  326. help="Warn about empty translations")
  327. parser.add_argument(
  328. "--information", action="store_true",
  329. help="Output all translations")
  330. parser.add_argument(
  331. "--import-check", action="store_true",
  332. help="Check import file and save informational to file")
  333. args = parser.parse_args()
  334. try:
  335. parse_txt(args.lang, args.no_warning, args.warn_empty, args.information, args.import_check)
  336. return 0
  337. except Exception as exc:
  338. print_exc()
  339. parser.error("%s" % exc)
  340. return 1
  341. if __name__ == "__main__":
  342. exit(main())