#!/usr/bin/python
'''ISO 9:1995 transliteration for Cyrillic text.
Simple usage:
import iso9
romanised_unicode = iso9.transliterate(cyrillic_unicode)
Copyright (c) 2008 by Mublin <mublin@dealloc.org>
This module is free software, and you may redistribute it and/or modify
it under the same terms as Python itself, so long as this copyright message
and disclaimer are retained in their original form.
IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.'''
__author__ = "Mublin <mublin@dealloc.org>"
__date__ = "19 April 2008"
__version__ = "0.1.1"
iso9 = u'''\
\u0410 \u0430 A a
\u04d2 \u04d3 \xc4 \xe4 00C4 00E4 a diaeresis
\u04d2\u0304 \u04d3\u0304 \u1ea0\u0308 \u1ea1\u0308 00C4+0323 00E4+0323 a diaeresis and dot below
\u04d0 \u04d1 \u0102 \u0103 0102 0103 a breve
\u0410\u0304 \u0430\u0304 \u0100 \u0101 0100 0101 a macron
\u04d4 \u04d5 \xc6 \xe6 00C6 00E6 ae ligature
\u0410\u0301 \u0430\u0301 \xc1 \xe1 00C1 00E1 a acute
\u0410\u030a \u0430\u030a \xc5 \xe5 00C5 00E5 a ring
\u0411 \u0431 B b
\u0412 \u0432 V v
\u0413 \u0433 G g
\u0403 \u0453 \u01f4 \u01f5 01F4 01F5 g acute
\u0492 \u0493 \u0120 \u0121 0120 0121 g dot
\u0494 \u0495 \u011e \u011f 011E 011F g breve
\u04ba \u04bb \u1e24 \u1e25 1E24 1E25 h dot
\u0414 \u0434 D d
\u0402 \u0452 \u0110 \u0111 0110 0111 d macron
\u0415 \u0435 E e
\u04d6 \u04d7 \u0114 \u0115 0114 0115 e breve
\u0401 \u0451 \xcb \xeb 00CB 00EB e diaeresis
\u0404 \u0454 \xca \xea 00CA 00EA e circumflex
\u0416 \u0436 \u017d \u017e 017D 017E z caron
\u0496 \u0497 \u017d\u0327 \u017e\u0327 017D+0327 017E+0327 z caron and cedilla
\u04dc \u04dd Z\u0304 z\u0304 Z+0304 z+0304 z macron
\u04c1 \u04c2 Z\u0306 z\u0306 Z+0306 z+0306 z breve
\u0417 \u0437 Z z
\u04de \u04df Z\u0308 z\u0308 Z+0308 z+0308 z diaeresis
\u04e0 \u04e1 \u0179 \u017a 0179 017A z acute
\u0405 \u0455 \u1e90 \u1e91 1E90 1E91 z circumflex
\u0418 \u0438 I i
\u04e2 \u04e3 \u012a \u012b 012A 012B i macron
\u0418\u0301 \u0438\u0301 \xcd \xed 00CD 00ED i acute
\u04e4 \u04e5 \xce \xee 00CE 00EE i circumflex
\u0419 \u0439 J j
\u0406 \u0456 \xcc \xec 00CC 00EC i grave
\u0407 \u0457 \xcf \xef 00CF 00EF i diaeresis
\u0406\u0304 \u0456\u0304 \u01cf \u01d0 01CF (012C) 01D0 (012D) i caron (or breve)
\u0408 \u0458 J\u030c \u01f0 J+030C 01F0 j caron
\u0408\u0335 \u0458\u0335 J\u0301 j\u0301 J+0301 j+0301 j acute
\u041a \u043a K k
\u04c3 \u04c4 \u1e32 \u1e33 1E32 1E33 k dot below
\u049c \u049d K\u0302 k\u0302 K+0302 k+0302 k circumflex
\u04a0 \u04a1 \u01e8 \u01e9 01E8 01E9 k caron
\u049e \u049f K\u0304 k\u0304 K+0304 k+0304 k macron
\u049a \u049b \u0136 \u0137 0136 0137 k cedilla
\u041a\u0328 \u043a\u0328 K\u0300 k\u0300 K+0300 k+0300 k grave
\u0480 \u0481 Q q
\u041b \u043b L l
\u0409 \u0459 L\u0302 l\u0302 L+0302 l+0302 l circumflex
\u0508 \u0509 \u0139 \u013a 0139 013A l acute
\u04a6 \u04a7 \u013b \u013c 013B 013C l cedilla
\u041c \u043c M m
\u041d \u043d N n
\u040a \u045a N\u0302 n\u0302 N+0302 n+0302 n circumflex
\u04a2 \u04a3 \u0145 \u0146 0145 0146 n cedilla
\u04c9 \u04ca \u1e46 \u1e47 1E46 1E47 n dot below
\u04a4 \u04a5 \u1e44 \u1e45 1E44 1E45 n dot
\u050a \u050b \u01f8 \u01f9 01F8 01F9 n grave
\u04c7 \u04c8 \u0143 \u0144 0143 0144 n acute
\u0147 \u0148 0147 0148 n caron
\u041d\u0304 \u043d\u0304 N\u0304 n\u0304 N+0304 n+0304 n macron
\u041e \u043e O o
\u04e6 \u04e7 \xd6 \xf6 00D6 00F6 o diaeresis
\u04e8 \u04e9 \xd4 \xf4 00D4 00F4 o circumflex
\u04ea \u04eb \u0150 \u0151 0150 0151 o double acute
\u04ea\u0304 \u04eb\u0304 \u1ecc\u0308 \u1ecd\u0308 00D6+0323 00F6+0323 o diaeresis and dot below
\u04a8 \u04a9 \xd2 \xf2 00D2 00F2 o grave
\u041e\u0301 \u043e\u0301 \xd3 \xf3 00D3 00F3 o acute
\u041e\u0304 \u043e\u0304 \u014c \u014d 014C 014D o macron
\u0152 \u0153 0152 0153 oe ligature
\u041f \u043f P p
\u04a6 \u04a7 \u1e54 \u1e55 1E54 1E55 p acute
P\u0300 p\u0300 P+0300 p+0300 p grave
\u0420 \u0440 R r
\u0421 \u0441 S s
\u04aa \u04ab \u015e \u015f 015E 015F s cedilla
\u0421\u0300 \u0441\u0300 S\u0300 s\u0300 S+0300 s+0300 s grave
\u0422 \u0442 T t
\u040b \u045b \u0106 \u0107 0106 0107 c acute
\u050e \u050f T\u0300 t\u0300 T+0300 t+0300 t grave
\u0422\u030c \u0442\u030c \u0164 \u0165 0164 0165 t caron
\u04ac \u04ad \u0162 \u0163 0162 0163 t cedilla
\u040c \u045c \u1e30 \u1e31 1E30 1E31 k acute
\u0423 \u0443 U u
\u04f0 \u04f1 \xdc \xfc 00DC 00FC u diaeresis
\u04ee \u04ef \u016a \u016b 016A 016B u macron
\u040e \u045e \u016c \u016d 016C 016D u breve
\u04f2 \u04f3 \u0170 \u0171 0170 0171 u double acute
\u0423\u0301 \u0443\u0301 \xda \xfa 00DA 00FA u acute
\u04f0\u0304 \u04f1\u0304 \u1ee4\u0308 \u1ee5\u0308 00DC+0323 00FC+0323 u diaeresis and dot below
\u04ae \u04af \xd9 \xf9 00D9 00F9 u grave
\u04b0 \u04b1 U\u0307 u\u0307 U+0307 u+0307 u dot
\u04ee\u0308 \u04ef\u0308 \u1ee4\u0304 \u1ee5\u0304 016A+0323 016B+0323 u macron and dot below
W w
\u0424 \u0444 F f
\u0425 \u0445 H h
\u04b2 \u04b3 \u1e28 \u1e29 1E28 1E29 h cedilla
\u0426 \u0446 C c
\u04b4 \u04b5 C\u0304 c\u0304 C+0304 c+0304 c macron
\u040f \u045f D\u0302 d\u0302 D+0302 d+0302 d circumflex
\u0427 \u0447 \u010c \u010d 010C 010D c caron
\u04b6 \u04b7 \xc7 \xe7 00C7 00E7 c cedilla
\u04cb \u04cc C\u0323 c\u0323 C+0323 c+0323 c dot below
\u04f4 \u04f5 C\u0308 c\u0308 C+0308 c+0308 c diaeresis
\u04b8 \u04b9 \u0108 \u0109 0108 0109 c circumflex
\u0427\u0300 \u0447\u0300 C\u0300 c\u0300 C+0300 c+0300 c grave
\u04bc \u04bd C\u0306 c\u0306 C+0306 c+0306 c breve
\u04be \u04bf \xc7\u0306 \xe7\u0306 00C7+0306 00E7+0306 c cedilla and breve
\u0428 \u0448 \u0160 \u0161 0160 0161 s caron
\u0429 \u0449 \u015c \u015d 015C 015D s circumflex
\u042a \u044a \u02ba \u02ba 02BA 02BA double prime
\u042b \u044b Y y
\u04f8 \u04f9 \u0178 \xff 0178 00FF y diaeresis
\u042b\u0304 \u044b\u0304 \u0232 \u0233 0232 0233 y macron
\u042c \u044c \u02b9 \u02b9 02B9 02B9 prime
\u042d \u044d \xc8 \xe8 00C8 00E8 e grave
\u04d8 \u04d9 A\u030b a\u030b A+030B a+030B a double acute
\u04da \u04db \xc0 \xe0 00C0 00E0 a grave
\u042e \u044e \xdb \xfb 00DB 00FB u circumflex
\u042e\u0304 \u044e\u0304 \u016e \u016f 016E 016F u ring
\u042f \u044f \xc2 \xe2 00C2 00E2 a circumflex
\u0490 \u0491 G\u0300 g\u0300 G+0300 g+0300 g grave
\u0462 \u0463 \u011a \u011b 011A 011B e caron
\u046a \u046b \u01cd \u01ce 01CD 01CE a caron
\u0472 \u0473 F\u0300 f\u0300 F+0300 f+0300 f grave
\u0474 \u0475 \u1ef2 \u1ef3 1EF2 1EF3 y grave
\u04c0 \u04c0 \u2021 \u2021 2021 2021 double dagger
\u2019 \u2019 \u2019 \u2019 2019 2019 apostrophe
\xa8 \xa8 00A8 00A8 diaeresis'''
iso9 = [line.split('\t') for line in iso9.split('\n')]
iso9, _iso9 = {}, iso9
for cyrmaj, cyrmin, latmaj, latmin, unicode1, unicode2, description in _iso9:
iso9[cyrmaj] = latmaj
iso9[cyrmin] = latmin
def transliterate(source):
result = []
for char in source:
try:
result.append(iso9[char])
except KeyError:
result.append(char)
return ''.join(result)
if __name__ == '__main__':
import sys
for line in sys.stdin:
sys.stdout.write(
transliterate(line.decode('utf-8')).encode('utf-8'))
Generated by GNU
enscript 1.6.4.