| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 | # Mapping from LCD source encoding to unicode charactersCUSTOM_CHARS = {    '\x06': '⏬',    '\x04': '🔃',    '\xe4': 'µ',    '\xdf': '°',    '\xe1': 'ä',    '\xe4': 'μ',    '\xef': 'ö',    '\xf5': 'ü',}# Charaters to be remapped prior to source-encoding transformation# This transformation is applied to the translation prior to being converted to the final encoding,# and maps UTF8 to UTF8. It replaces unavailable symbols in the translation to a close# representation in the source encoding.# sources# https://en.wikipedia.org/wiki/Czech_orthography# https://en.wikipedia.org/wiki/German_orthography# https://en.wikipedia.org/wiki/French_orthography# https://en.wikipedia.org/wiki/Spanish_orthography# https://en.wikipedia.org/wiki/Italian_orthography# https://en.wikipedia.org/wiki/Polish_alphabet# https://en.wikipedia.org/wiki/Dutch_orthography# https://en.wikipedia.org/wiki/Romanian_alphabet# https://en.wikipedia.org/wiki/Hungarian_alphabet# https://en.wikipedia.org/wiki/Gaj%27s_Latin_alphabet# https://en.wikipedia.org/wiki/Slovak_orthography# https://en.wikipedia.org/wiki/Swedish_alphabet# https://en.wikipedia.org/wiki/Norwegian_orthographyTRANS_CHARS = {    'á': 'a', #cz,fr,es,hu,sk    'Á': 'A', #cz,fr,hu,sk    'à': 'a', #fr,it    'À': 'A', #fr,it    'â': 'a', #fr,ro    'Â': 'A', #ro    'Ä': 'ä', #de,sv,no,sk    'å': 'a', #sv,no    'Å': 'A', #sv,no    'æ': 'ä', #sv,no    'ą': 'a', #pl    'Ą': 'A', #pl    'ă': 'a', #ro    'Ă': 'A', #ro    'ć': 'c', #pl,hr    'Ć': 'C', #pl,hr    'ç': 'c', #fr,nl    'č': 'c', #cz,hr,sk    'Č': 'C', #cz,hr,sk    'ď': 'd', #cz,sk    'Ď': 'D', #cz,sk    'đ': 'd', #hr    'Đ': 'D', #hr    'é': 'e', #cz,fr,es,it,nl,hu,sk    'É': 'E', #cz,fr,it,hu,sk    'è': 'e', #fr,it,nl    'È': 'E', #fr,it    'ê': 'e', #fr,nl    'ě': 'e', #cz    'ë': 'e', #fr    'Ě': 'E', #cz    'ę': 'e', #pl    'Ę': 'E', #pl    'í': 'i', #cz,es,it,sk    'Í': 'I', #cz,it,sk    'î': 'i', #fr,ro    'Î': 'I', #ro    'ĺ': 'l', #sk    'Ĺ': 'L', #sk    'ł': 'l', #pl    'Ł': 'L', #pl    'ľ': 'l', #sk    'Ľ': 'L', #sk    'ń': 'n', #pl    'Ń': 'N', #pl    'ň': 'n', #cz,sk    'Ň': 'N', #cz,sk    'ñ': 'n', #es,nl    'ó': 'o', #cz,es,pl,hu,sk    'Ó': 'O', #cz,pl,hu,sk    'ò': 'o', #it    'Ò': 'O', #it    'ô': 'o', #fr,nl,sk    'Ô': 'O', #sk    'œ': 'o', #fr    'ø': 'ö', #sv,no    'Ö': 'ö', #de,sv,no,hu    'ő': 'o', #hu    'Ő': 'O', #hu    'ŕ': 'r', #sk    'Ŕ': 'R', #sk    'ř': 'r', #cz    'Ř': 'R', #cz    'ś': 's', #pl    'Ś': 's', #pl    'š': 's', #cz,hr,sk    'Š': 'S', #cz,hr,sk    'ș': 's', #ro    'Ș': 'S', #ro    'ß': 'ss',#de    'ť': 't', #cz,sk    'Ť': 'T', #cz,sk    'ț': 't', #ro    'Ț': 'T', #ro    'ú': 'u', #cz,es,hu,sk    'Ú': 'U', #cz,hu,sk    'ù': 'u', #it    'Ù': 'U', #it    'û': 'u', #fr    'Ü': 'ü', #de,hu    'ů': 'u', #cz    'Ů': 'U', #cz    'ű': 'u', #hu    'Ű': 'U', #hu    'ý': 'y', #cz,sk    'Ý': 'Y', #cz,sk    'ÿ': 'y', #fr    'ź': 'z', #pl    'Ź': 'Z', #pl    'ž': 'z', #cz,hr,sk    'Ž': 'z', #cz,hr,sk    'ż': 'z', #pl    'Ż': 'Z', #pl    '¿': '', #es    '¡': '', #es}def _character_check(buf, valid_chars):    for c in buf:        if (not c.isascii() or not c.isprintable()) and c not in valid_chars:            return c    return Nonedef source_check(buf):    valid_chars = set(CUSTOM_CHARS.values())    valid_chars.add('\n')    return _character_check(buf, valid_chars)def translation_check(buf):    valid_chars = set(CUSTOM_CHARS.keys())    valid_chars.add('\n')    return _character_check(buf, valid_chars)def source_to_unicode(buf):    for src, dst in CUSTOM_CHARS.items():        buf = buf.replace(src, dst)    return bufdef trans_replace(buf):    for src, dst in TRANS_CHARS.items():        buf = buf.replace(src, dst)    return bufdef unicode_to_source(buf):    buf = trans_replace(buf)    for dst, src in CUSTOM_CHARS.items():        buf = buf.replace(src, dst)    return buf
 |