| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 | 
							- #!/usr/bin/env python3
 
- import argparse
 
- import bisect
 
- import codecs
 
- import polib
 
- import regex
 
- import sys
 
- import lib.charset as cs
 
- def line_warning(path, line, msg):
 
-     print(f'{path}:{line}: {msg}', file=sys.stderr)
 
- def line_error(path, line, msg):
 
-     print(f'{path}:{line}: {msg}', file=sys.stderr)
 
- def entry_warning_locs(entries):
 
-     for msgid, data in entries:
 
-         print('   text: ' + repr(msgid), file=sys.stderr)
 
-         positions = ', '.join(map(lambda x: x[0] + ':' + str(x[1]), data['occurrences']))
 
-         print('     in: ' + positions, file=sys.stderr)
 
- def entries_warning(entries, msg):
 
-     print('warning: ' + msg, file=sys.stderr)
 
-     entry_warning_locs(entries)
 
- def entry_warning(entry, msg):
 
-     entries_warning([entry], msg)
 
- def newline_positions(source):
 
-     lines = [-1]
 
-     while True:
 
-         idx = source.find('\n', lines[-1] + 1)
 
-         if idx < 0:
 
-             break
 
-         lines.append(idx)
 
-     if lines[-1] != len(source) - 1:
 
-         lines.append(len(source) - 1)
 
-     return lines[1:]
 
- def index_to_line(index, lines):
 
-     return bisect.bisect_left(lines, index) + 1
 
- def extract_file(path, catalog, warn_skipped=False):
 
-     source = open(path).read()
 
-     newlines = newline_positions(source)
 
-     # match internationalized quoted strings
 
-     RE_START = r'\b (_[iI]|ISTR) \s* \('
 
-     RE_META = r'//// \s* ([^\n]*)$'
 
-     RE_I = fr'''
 
-         (?<!(?:/[/*]|^\s*\#) [^\n]*)  # not on a comment or preprocessor
 
-         {RE_START}                    # $1 ref type _i( or ISTR(
 
-         (?:
 
-           \s*
 
-           ("(?:[^"\\]|\\.)*")         # $2 quoted string (chunk)
 
-           (?:\s* {RE_META} )?         # $3 inline metadata
 
-         )+
 
-         \s* \)                        # )
 
-         (?:
 
-           (?:[^\n] (?!{RE_START}))*   # anything except another entry
 
-           {RE_META}                   # $5 final metadata
 
-         )?
 
-     '''
 
-     r_ref_type = 1
 
-     r_quoted_chunk = 2
 
-     r_inline_data = 3
 
-     r_eol_data = 5
 
-     for m in regex.finditer(RE_I, source, regex.M|regex.X):
 
-         # parse the text
 
-         line = index_to_line(m.start(0), newlines)
 
-         text = ""
 
-         for block in m.captures(r_quoted_chunk):
 
-             # remove quotes and unescape
 
-             block = block[1:-1]
 
-             block = codecs.decode(block, 'unicode-escape', 'strict')
 
-             block = cs.source_to_unicode(block)
 
-             text += block
 
-         # check if text is non-empty
 
-         if len(text) == 0:
 
-             line_warning(path, line, 'empty source text, ignored')
 
-             continue
 
-         data = set()
 
-         comments = set()
 
-         for n in [r_inline_data, r_eol_data]:
 
-             meta = m.group(n)
 
-             if meta is not None:
 
-                 meta_parts = meta.split('//', 1)
 
-                 data.add(meta_parts[0].strip())
 
-                 if len(meta_parts) > 1:
 
-                     comments.add(meta_parts[1].strip())
 
-         # check if this message should be ignored
 
-         ignored = False
 
-         for meta in data:
 
-             if regex.search(r'\bIGNORE\b', meta) is not None:
 
-                 ignored = True
 
-                 break
 
-         if ignored:
 
-             if warn_skipped:
 
-                 line_warning(path, line, 'skipping explicitly ignored translation')
 
-             continue
 
-         # extra message catalog name (if any)
 
-         cat_name = set()
 
-         for meta in data:
 
-             sm = regex.search(r'\b(MSG_\w+)', meta)
 
-             if sm is not None:
 
-                 cat_name.add(sm.group(1))
 
-         # reference type annotation
 
-         ref_type = 'def' if m.group(r_ref_type) == 'ISTR' else 'ref'
 
-         if ref_type == 'def':
 
-             # ISTR definition: lookup nearby assignment
 
-             lineup_def = source[newlines[line-2]+1:m.end(r_ref_type)]
 
-             sm = regex.search(r'\b PROGMEM_(\S+) \s*=\s* ISTR $', lineup_def, regex.M|regex.X)
 
-             if sm is None:
 
-                 line_warning(path, line, 'ISTR not used in an assignment')
 
-             elif sm.group(1) != 'I1':
 
-                 line_warning(path, line, 'ISTR not used with PROGMEM_I1')
 
-         # append the translation to the catalog
 
-         pos = [(path, line)]
 
-         entry = catalog.get(text)
 
-         if entry is None:
 
-             catalog[text] = {'occurrences': set(pos),
 
-                              'data': data,
 
-                              'cat_name': cat_name,
 
-                              'comments': comments,
 
-                              'ref_type': set([ref_type])}
 
-         else:
 
-             entry['occurrences'] = entry['occurrences'].union(pos)
 
-             entry['data'] = entry['data'].union(data)
 
-             entry['cat_name'] = entry['cat_name'].union(cat_name)
 
-             entry['comments'] = entry['comments'].union(comments)
 
-             entry['ref_type'].add(ref_type)
 
- def extract_refs(path, catalog):
 
-     source = open(path).read()
 
-     newlines = newline_positions(source)
 
-     # match message catalog references to add backrefs
 
-     RE_CAT = r'''
 
-         (?<!(?:/[/*]|^\s*\#) [^\n]*)         # not on a comment or preprocessor
 
-         \b (?:_[TO]) \s* \( \s* (\w+) \s* \) # $1 catalog name
 
-     '''
 
-     for m in regex.finditer(RE_CAT, source, regex.M|regex.X):
 
-         line = index_to_line(m.start(0), newlines)
 
-         pos = [(path, line)]
 
-         cat_name = m.group(1)
 
-         found = False
 
-         defined = False
 
-         for entry in catalog.values():
 
-             if cat_name in entry['cat_name']:
 
-                 entry['occurrences'] = entry['occurrences'].union(pos)
 
-                 entry['ref_type'].add('ref')
 
-                 found = True
 
-                 if 'def' in entry['ref_type']:
 
-                     defined = True
 
-         if not found:
 
-             line_error(path, line, f'{cat_name} not found')
 
-         elif not defined:
 
-             line_error(path, line, f'{cat_name} referenced but never defined')
 
- def check_entries(catalog, warn_missing, warn_same_line):
 
-     cat_entries = {}
 
-     for entry in catalog.items():
 
-         msgid, data = entry
 
-         # ensure we have at least one name
 
-         if len(data['cat_name']) == 0 and warn_missing:
 
-             entry_warning(entry, 'missing MSG identifier')
 
-         # ensure references are being defined
 
-         if data['ref_type'] == set(['def']):
 
-             if len(data['cat_name']) == 0:
 
-                 if warn_missing:
 
-                     entry_warning(entry, 'entry defined, but never used')
 
-             else:
 
-                 id_name = next(iter(data['cat_name']))
 
-                 entry_warning(entry, f'{id_name} defined, but never used')
 
-         # check custom characters
 
-         invalid_char = cs.source_check(msgid)
 
-         if invalid_char is not None:
 
-             entry_warning(entry, 'source contains unhandled custom character ' + repr(invalid_char))
 
-         tokens = []
 
-         for meta in data['data']:
 
-             tokens.extend(regex.split(r'\s+', meta))
 
-         seen_keys = set()
 
-         for token in tokens:
 
-             if len(token) == 0:
 
-                 continue
 
-             # check metadata syntax
 
-             if regex.match(r'[cr]=\d+$', token) is None and \
 
-                regex.match(r'MSG_[A-Z_0-9]+$', token) is None:
 
-                 entry_warning(entry, 'bogus annotation: ' + repr(token))
 
-             # check for repeated keys
 
-             key = regex.match(r'([^=])+=', token)
 
-             if key is not None:
 
-                 key_name = key.group(1)
 
-                 if key_name in seen_keys:
 
-                     entry_warning(entry, 'repeated annotation: ' + repr(token))
 
-                 else:
 
-                     seen_keys.add(key_name)
 
-             # build the inverse catalog map
 
-             if token.startswith('MSG_'):
 
-                 if token not in cat_entries:
 
-                     cat_entries[token] = [entry]
 
-                 else:
 
-                     cat_entries[token].append(entry)
 
-     # ensure the same id is not used in multiple entries
 
-     for cat_name, entries in cat_entries.items():
 
-         if len(entries) > 1:
 
-             entries_warning(entries, f'{cat_name} used in multiple translations')
 
-     if warn_same_line:
 
-         # build the inverse location map
 
-         entry_locs = {}
 
-         for entry in catalog.items():
 
-             msgid, data = entry
 
-             for loc in data['occurrences']:
 
-                 if loc not in entry_locs:
 
-                     entry_locs[loc] = [loc]
 
-                 else:
 
-                     entry_locs[loc].append(loc)
 
-         # check for multiple translations on the same location
 
-         for loc, entries in entry_locs.items():
 
-             if len(entries) > 1:
 
-                 line_warning(loc[0], loc[1], f'line contains multiple translations')
 
- def main():
 
-     ap = argparse.ArgumentParser()
 
-     ap.add_argument('-o', dest='pot', required=True, help='PO template output file')
 
-     ap.add_argument('--no-missing', action='store_true',
 
-                     help='Do not warn about missing MSG entries')
 
-     ap.add_argument('--warn-same-line', action='store_true',
 
-                     help='Warn about multiple translations on the same line')
 
-     ap.add_argument('--warn-skipped', action='store_true',
 
-                     help='Warn about explicitly ignored translations')
 
-     ap.add_argument('-s', '--sort', action='store_true',
 
-                     help='Sort output catalog')
 
-     ap.add_argument('file', nargs='+', help='Input files')
 
-     args = ap.parse_args()
 
-     # extract strings
 
-     catalog = {}
 
-     for path in args.file:
 
-         extract_file(path, catalog, warn_skipped=args.warn_skipped)
 
-     # process backreferences in a 2nd pass
 
-     for path in args.file:
 
-         extract_refs(path, catalog)
 
-     # check the catalog entries
 
-     check_entries(catalog, warn_missing=not args.no_missing, warn_same_line=args.warn_same_line)
 
-     # write the output PO template
 
-     po = polib.POFile()
 
-     po.metadata = {
 
-         'Language': 'en',
 
-         'MIME-Version': '1.0',
 
-         'Content-Type': 'text/plain; charset=utf-8',
 
-         'Content-Transfer-Encoding': '8bit'}
 
-     messages = catalog.keys()
 
-     if args.sort:
 
-         messages = sorted(messages)
 
-     for msgid in messages:
 
-         data = catalog[msgid]
 
-         comment = ', '.join(data['data'])
 
-         if len(data['comments']):
 
-             comment += '\n' + '\n'.join(data['comments'])
 
-         occurrences = data['occurrences']
 
-         if args.sort:
 
-             occurrences = list(sorted(occurrences))
 
-         po.append(
 
-             polib.POEntry(
 
-                 msgid=msgid,
 
-                 comment=comment,
 
-                 occurrences=occurrences))
 
-     po.save(args.pot)
 
-     return 0
 
- if __name__ == '__main__':
 
-     exit(main())
 
 
  |