Browse Source

Merge pull request #3693 from 3d-gussner/MK3_diacritics

Update diacritics
3d-gussner 2 years ago
parent
commit
d76d01a4f6
1 changed files with 110 additions and 8 deletions
  1. 110 8
      lang/lib/charset.py

+ 110 - 8
lang/lib/charset.py

@@ -14,15 +14,117 @@ CUSTOM_CHARS = {
 # This transformation is applied to the translation prior to being converted to the final encoding,
 # and maps UTF8 to UTF8. It replaces unavailable symbols in the translation to a close
 # representation in the source encoding.
+# sources
+# https://en.wikipedia.org/wiki/Czech_orthography
+# https://en.wikipedia.org/wiki/German_orthography
+# https://en.wikipedia.org/wiki/French_orthography
+# https://en.wikipedia.org/wiki/Spanish_orthography
+# https://en.wikipedia.org/wiki/Italian_orthography
+# https://en.wikipedia.org/wiki/Polish_alphabet
+# https://en.wikipedia.org/wiki/Dutch_orthography
+# https://en.wikipedia.org/wiki/Romanian_alphabet
+# https://en.wikipedia.org/wiki/Hungarian_alphabet
+# https://en.wikipedia.org/wiki/Gaj%27s_Latin_alphabet
+# https://en.wikipedia.org/wiki/Slovak_orthography
+# https://en.wikipedia.org/wiki/Swedish_alphabet
+# https://en.wikipedia.org/wiki/Norwegian_orthography
+
 TRANS_CHARS = {
-    'Ä': 'ä',
-    'Å': 'A',
-    'Ö': 'ö',
-    'Ü': 'ü',
-    'å': 'a',
-    'æ': 'ä',
-    'ø': 'ö',
-    'ß': 'ss',
+    'á': 'a', #cz,fr,es,hu,sk
+    'Á': 'A', #cz,fr,hu,sk
+    'à': 'a', #fr,it
+    'À': 'A', #fr,it
+    'â': 'a', #fr,ro
+    'Â': 'A', #ro
+    'Ä': 'ä', #de,sv,no,sk
+    'å': 'a', #sv,no
+    'Å': 'A', #sv,no
+    'æ': 'ä', #sv,no
+    'ą': 'a', #pl
+    'Ą': 'A', #pl
+    'ă': 'a', #ro
+    'Ă': 'A', #ro
+    'ć': 'c', #pl,hr
+    'Ć': 'C', #pl,hr
+    'ç': 'c', #fr,nl
+    'č': 'c', #cz,hr,sk
+    'Č': 'C', #cz,hr,sk
+    'ď': 'd', #cz,sk
+    'Ď': 'D', #cz,sk
+    'đ': 'd', #hr
+    'Đ': 'D', #hr
+    'é': 'e', #cz,fr,es,it,nl,hu,sk
+    'É': 'E', #cz,fr,it,hu,sk
+    'è': 'e', #fr,it,nl
+    'È': 'E', #fr,it
+    'ê': 'e', #fr,nl
+    'ě': 'e', #cz
+    'ë': 'e', #fr
+    'Ě': 'E', #cz
+    'ę': 'e', #pl
+    'Ę': 'E', #pl
+    'í': 'i', #cz,es,it,sk
+    'Í': 'I', #cz,it,sk
+    'î': 'i', #fr,ro
+    'Î': 'I', #ro
+    'ĺ': 'l', #sk
+    'Ĺ': 'L', #sk
+    'ł': 'l', #pl
+    'Ł': 'L', #pl
+    'ľ': 'l', #sk
+    'Ľ': 'L', #sk
+    'ń': 'n', #pl
+    'Ń': 'N', #pl
+    'ň': 'n', #cz,sk
+    'Ň': 'N', #cz,sk
+    'ñ': 'n', #es,nl
+    'ó': 'o', #cz,es,pl,hu,sk
+    'Ó': 'O', #cz,pl,hu,sk
+    'ò': 'o', #it
+    'Ò': 'O', #it
+    'ô': 'o', #fr,nl,sk
+    'Ô': 'O', #sk
+    'œ': 'o', #fr
+    'ø': 'ö', #sv,no
+    'Ö': 'ö', #de,sv,no,hu
+    'ő': 'o', #hu
+    'Ő': 'O', #hu
+    'ŕ': 'r', #sk
+    'Ŕ': 'R', #sk
+    'ř': 'r', #cz
+    'Ř': 'R', #cz
+    'ś': 's', #pl
+    'Ś': 's', #pl
+    'š': 's', #cz,hr,sk
+    'Š': 'S', #cz,hr,sk
+    'ș': 's', #ro
+    'Ș': 'S', #ro
+    'ß': 'ss',#de
+    'ť': 't', #cz,sk
+    'Ť': 'T', #cz,sk
+    'ț': 't', #ro
+    'Ț': 'T', #ro
+    'ú': 'u', #cz,es,hu,sk
+    'Ú': 'U', #cz,hu,sk
+    'ù': 'u', #it
+    'Ù': 'U', #it
+    'û': 'u', #fr
+    'Ü': 'ü', #de,hu
+    'ů': 'u', #cz
+    'Ů': 'U', #cz
+    'ű': 'u', #hu
+    'Ű': 'U', #hu
+    'ý': 'y', #cz,sk
+    'Ý': 'Y', #cz,sk
+    'ÿ': 'y', #fr
+    'ź': 'z', #pl
+    'Ź': 'Z', #pl
+    'ž': 'z', #cz,hr,sk
+    'Ž': 'z', #cz,hr,sk
+    'ż': 'z', #pl
+    'Ż': 'Z', #pl
+    '¿': '', #es
+    '¡': '', #es
 }