pathumego · January 3, 2018 15:34
diff --git a/iscii2unicode.py b/iscii2unicode.py
 #!/usr/bin/env python
 
 # released under BSD License
 
 # mary <[email protected]> aka meyarivan <[email protected]>
 
 # inspired by ICU [ http://oss.software.ibm.com/icu/ ]
 
 # code still in alpha stage.. lots of redundant code.. and probably incorrect
 # if ya find errors, pls submit bug reports at indlinux
 
 # for usage, either run the script or scroll down to end of the script
 
 import sys
 import array
 
 # Generic Constants
 
 ISCII_ATR                 = 0x00EF
 ATR_MASK                  = 0x004F
 DANDA                     = 0x0964
 DELTA                     = 0x0080
 DEV_ANUDATTA              = 0x0952
 DOUBLE_DANDA              = 0x0965                  
 ISCII_EXT                 = 0x00F0
 EXT_RANGE_BEGIN           = 0x00A1
 EXT_RANGE_END             = 0x00EE
 HALANT                    = 0x094d
 INDIC_BLOCK_BEGIN         = 0x0900
 INDIC_BLOCK_END           = 0x0D7F
 INVALID_CHAR              = 0xFFFF
 ISCII_BEGIN               = 0x00A0
 ISCII_DANDA               = 0x00EA
 ISCII_HALANT              = 0x00E8
 ISCII_INV                 = 0x00D9
 ISCII_NUKTA               = 0x00E9
 LF                        = 0x000A
 NO_CHAR                   = 0xFFFE
 UNI_BEGIN                 = 0x0900
 UNI_END                   = 0x097F
 ZWJ                       = 0x200d
 ZWNJ                      = 0x200c
 
 
 # map between the ISCII scripts as specified via the ATR switch
 # and the script in unicode
 
 # bengali and assamese have same script except for two characters
 # (according to ISCII 91
 
 ISCII_SCRIPTS = {
    0x40 : -1, # DEFAULT
    0x42 : 0,  # DEVNAG
    0x43 : 1,  # BENGALI
    0x44 : 5,  # TAMIL
    0x45 : 6,  # TELUGU
    0x46 : 1,  # ASSAMESE
    0x47 : 4,  # ORIYA
    0x48 : 7,  # KANNADA
    0x49 : 8,  # MALAYALAM
    0x4A : 3,  # GUJARATI
    0x4B : 2   # PUNJABI
    }
 
 # ISCII_SPECIAL CHARS
 
 ISCII_SPECIALS = [ISCII_ATR, ISCII_EXT, ISCII_INV]
 
 iscii_to_unicode = {
    0xa0 : 0x900,
    0xa1 : 0x901,
    0xa2 : 0x902,
    0xa3 : 0x903,
    0xa4 : 0x905,
    0xa5 : 0x906,
    0xa6 : 0x907,
    0xa7 : 0x908,
    0xa8 : 0x909,
    0xa9 : 0x90a,
    0xaa : 0x90b,
    0xab : 0x90e,
    0xac : 0x90f,
    0xad : 0x910,
    0xae : 0x90d,
    0xaf : 0x912,
    0xb0 : 0x913,
    0xb1 : 0x914,
    0xb2 : 0x911,
    0xb3 : 0x915,
    0xb4 : 0x916,
    0xb5 : 0x917,
    0xb6 : 0x918,
    0xb7 : 0x919,
    0xb8 : 0x91a,
    0xb9 : 0x91b,
    0xba : 0x91c,
    0xbb : 0x91d,
    0xbc : 0x91e,
    0xbd : 0x91f,
    0xbe : 0x920,
    0xbf : 0x921,
    0xc0 : 0x922,
    0xc1 : 0x923,
    0xc2 : 0x924,
    0xc3 : 0x925,
    0xc4 : 0x926,
    0xc5 : 0x927,
    0xc6 : 0x928,
    0xc7 : 0x929,
    0xc8 : 0x92a,
    0xc9 : 0x92b,
    0xca : 0x92c,
    0xcb : 0x92d,
    0xcc : 0x92e,
    0xcd : 0x92f,
    0xce : 0x95f,
    0xcf : 0x930,
    0xd0 : 0x931,
    0xd1 : 0x932,
    0xd2 : 0x933,
    0xd3 : 0x934,
    0xd4 : 0x935,
    0xd5 : 0x936,
    0xd6 : 0x937,
    0xd7 : 0x938,
    0xd8 : 0x939,
    0xd9 : 0x200d,
    0xda : 0x93e,
    0xdb : 0x93f,
    0xdc : 0x940,
    0xdd : 0x941,
    0xde : 0x942,
    0xdf : 0x943,
    0xe0 : 0x946,
    0xe1 : 0x947,
    0xe2 : 0x948,
    0xe3 : 0x945,
    0xe4 : 0x94a,
    0xe5 : 0x94b,
    0xe6 : 0x94c,
    0xe7 : 0x949,
    0xe8 : 0x94d,
    0xe9 : 0x93c,
    0xea : 0x964,
    0xeb : 0xffff,
    0xec : 0xffff,
    0xed : 0xffff,
    0xee : 0xffff,
    0xef : 0xffff,
    0xf0 : 0xffff,
    0xf1 : 0x966,
    0xf2 : 0x967,
    0xf3 : 0x968,
    0xf4 : 0x969,
    0xf5 : 0x96a,
    0xf6 : 0x96b,
    0xf7 : 0x96c,
    0xf8 : 0x96d,
    0xf9 : 0x96e,
    0xfa : 0x96f,
    0xfb : 0xffff,
    0xfc : 0xffff,
    0xfd : 0xffff,
    0xfe : 0xffff,
    0xff : 0xffff
    }
 
 
 """
 # code to generate the validation_table
 ( ya ya .. agreed that it is kludgy)
 
 # need python 2.3
 
 import unicodedata
 
 UNI_BEGIN = 0x0900
 UNI_END = 0x097F
 DELTA = 0x80
 SCRS = 9
 
 table = []
 
 for char in range(UNI_END - UNI_BEGIN + 1):
   res = [0] * SCRS
   for scr in range(0, SCRS):
       val = unichr(UNI_BEGIN + (scr * DELTA) + char)
       res[scr] = int(unicodedata.name(val, None) is not None)
 
   table.append(res)
 
 """
 
 validation_table = [
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #    0x0      0
    [1, 1, 0, 1, 1, 0, 1, 0, 0], #    0x1      1
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x2      2
    [1, 1, 0, 1, 1, 1, 1, 1, 1], #    0x3      3
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #    0x4      4
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x5      5
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x6      6
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x7      7
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x8      8
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0x9      9
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0xa     10
    [1, 1, 0, 1, 1, 0, 1, 1, 1], #    0xb     11
    [1, 1, 0, 0, 1, 0, 1, 1, 1], #    0xc     12
    [1, 0, 0, 1, 0, 0, 0, 0, 0], #    0xd     13
    [1, 0, 0, 0, 0, 1, 1, 1, 1], #    0xe     14
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #    0xf     15
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x10     16
    [1, 0, 0, 1, 0, 0, 0, 0, 0], #   0x11     17
    [1, 0, 0, 0, 0, 1, 1, 1, 1], #   0x12     18
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x13     19
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x14     20
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x15     21
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x16     22
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x17     23
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x18     24
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x19     25
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x1a     26
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x1b     27
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x1c     28
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x1d     29
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x1e     30
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x1f     31
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x20     32
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x21     33
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x22     34
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x23     35
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x24     36
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x25     37
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x26     38
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x27     39
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x28     40
    [1, 0, 0, 0, 0, 1, 0, 0, 0], #   0x29     41
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x2a     42
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x2b     43
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x2c     44
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x2d     45
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x2e     46
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x2f     47
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x30     48
    [1, 0, 0, 0, 0, 1, 1, 1, 1], #   0x31     49
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x32     50
    [1, 0, 1, 1, 1, 1, 1, 1, 1], #   0x33     51
    [1, 0, 0, 0, 0, 1, 0, 0, 1], #   0x34     52
    [1, 0, 1, 1, 0, 1, 1, 1, 1], #   0x35     53
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x36     54
    [1, 1, 0, 1, 1, 1, 1, 1, 1], #   0x37     55
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x38     56
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x39     57
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x3a     58
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x3b     59
    [1, 1, 1, 1, 1, 0, 0, 0, 0], #   0x3c     60
    [1, 0, 0, 1, 1, 0, 0, 0, 0], #   0x3d     61
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x3e     62
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x3f     63
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x40     64
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x41     65
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x42     66
    [1, 1, 0, 1, 1, 0, 1, 1, 1], #   0x43     67
    [1, 1, 0, 1, 0, 0, 1, 1, 0], #   0x44     68
    [1, 0, 0, 1, 0, 0, 0, 0, 0], #   0x45     69
    [1, 0, 0, 0, 0, 1, 1, 1, 1], #   0x46     70
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x47     71
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x48     72
    [1, 0, 0, 1, 0, 0, 0, 0, 0], #   0x49     73
    [1, 0, 0, 0, 0, 1, 1, 1, 1], #   0x4a     74
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x4b     75
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x4c     76
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x4d     77
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x4e     78
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x4f     79
    [1, 0, 0, 1, 0, 0, 0, 0, 0], #   0x50     80
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x51     81
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x52     82
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x53     83
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x54     84
    [0, 0, 0, 0, 0, 0, 1, 1, 0], #   0x55     85
    [0, 0, 0, 0, 1, 0, 1, 1, 0], #   0x56     86
    [0, 1, 0, 0, 1, 1, 0, 0, 1], #   0x57     87
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x58     88
    [1, 0, 1, 0, 0, 0, 0, 0, 0], #   0x59     89
    [1, 0, 1, 0, 0, 0, 0, 0, 0], #   0x5a     90
    [1, 0, 1, 0, 0, 0, 0, 0, 0], #   0x5b     91
    [1, 1, 1, 0, 1, 0, 0, 0, 0], #   0x5c     92
    [1, 1, 0, 0, 1, 0, 0, 0, 0], #   0x5d     93
    [1, 0, 1, 0, 0, 0, 0, 1, 0], #   0x5e     94
    [1, 1, 0, 0, 1, 0, 0, 0, 0], #   0x5f     95
    [1, 1, 0, 1, 1, 0, 1, 1, 1], #   0x60     96
    [1, 1, 0, 0, 1, 0, 1, 1, 1], #   0x61     97
    [1, 1, 0, 0, 0, 0, 0, 0, 0], #   0x62     98
    [1, 1, 0, 0, 0, 0, 0, 0, 0], #   0x63     99
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x64    100
    [1, 0, 0, 0, 0, 0, 0, 0, 0], #   0x65    101
    [1, 1, 1, 1, 1, 0, 1, 1, 1], #   0x66    102
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x67    103
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x68    104
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x69    105
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6a    106
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6b    107
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6c    108
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6d    109
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6e    110
    [1, 1, 1, 1, 1, 1, 1, 1, 1], #   0x6f    111
    [1, 1, 1, 0, 1, 1, 0, 0, 0], #   0x70    112
    [0, 1, 1, 0, 0, 1, 0, 0, 0], #   0x71    113
    [0, 1, 1, 0, 0, 1, 0, 0, 0], #   0x72    114
    [0, 1, 1, 0, 0, 0, 0, 0, 0], #   0x73    115
    [0, 1, 1, 0, 0, 0, 0, 0, 0], #   0x74    116
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x75    117
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x76    118
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x77    119
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x78    120
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x79    121
    [0, 1, 0, 0, 0, 0, 0, 0, 0], #   0x7a    122
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x7b    123
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x7c    124
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x7d    125
    [0, 0, 0, 0, 0, 0, 0, 0, 0], #   0x7e    126
    [0, 0, 0, 0, 0, 0, 0, 0, 0]  #   0x7f    127
    ]
 
 # special characters formed by combination of consonants and nukta
 
 nukta_specials = {
    0xA6 : 0x090c,
    0xEA : 0x093D,
    0xDF : 0x0944,
    0xA1 : 0x0950,
    0xB3 : 0x0958,
    0xB4 : 0x0959,
    0xB5 : 0x095A,
    0xBA : 0x095B,
    0xBF : 0x095C,
    0xC0 : 0x095D,
    0xC9 : 0x095E,
    0xAA : 0x0960,
    0xA7 : 0x0961,
    0xDB : 0x0962,
    0xDC : 0x0963
    }
   
 
 special_maps = {
    ## the two points which are different between assamese and bengali
    ## (according to the charts in ISCII-91 documentation)
    (5, 0xCF) : 0x09F0,
    (5, 0xD4) : 0x09F1,
    }
 
 
 
 def make_script_maps():
    _invalid_range = range(0xEB, 0xF1) + range(0xFB, 0xFF + 1)    
    scripts = {}
   
    for i in range(9):
        curr_scr = {}
 
        for ch in range(0xA0):
            curr_scr[ch] = ch
 
 
        for ch in range(0xA0, 0xFF + 1):
               
            if (ch in _invalid_range):
                continue
           
            t = iscii_to_unicode[ch]
           
            if validation_table[t & 0xFF][i]:
                curr_scr[ch] = t
 
                curr_scr[ch] += (i * 0x80)
 
           
        scripts[i] = curr_scr
 
    for i in special_maps:
        scripts[i[0]][i[1]] = special_maps[i]
 
    return scripts
 
 ## setup the script map  (iscii -> unicode) for all the supported scripts
 ## hope it speeds up things
 
 script_maps = make_script_maps()
 
 ####
 
 def make_invalid_maps():
    maps = {}
 
    for i in range(9):
        curr_map = {}
       
        for j in range(0xFF + 1):
            curr_map[j] = (not script_maps[i].has_key(j)) and \
                          (j not in ISCII_SPECIALS)
        maps[i] = curr_map
       
    return maps
 
 ## setup the map of invalid characters for each script
 
 invalid_chars = make_invalid_maps()
 
 ## tmp code
 
 iscii_modifying = {}
 _tmp = ISCII_SPECIALS + [ISCII_HALANT, ISCII_NUKTA, ISCII_DANDA]
 
 for i in range(0xFF + 1):
    iscii_modifying[i] = int(i in _tmp)
 
 ## end of tmp code
 
 def to_utf8(y):
    """
   converts an array of integers to utf8 string
   """
 
    out = []
 
    for x in y:
 
        if x < 0x080:
            out.append(x)
        elif x < 0x0800:
            out.append((x >> 6) | 0xC0)
            out.append((x & 0x3F) | 0x80)
        elif x < 0x10000:
            out.append((x >> 12) | 0xE0)
            out.append(((x >> 6) & 0x3F) | 0x80)
            out.append((x & 0x3F) | 0x80)
        else:
            out.append((x >> 18) | 0xF0)
            out.append((x >> 12) & 0x3F)
            out.append(((x >> 6) & 0x3F) | 0x80)
            out.append((x & 0x3F) | 0x80)
 
    return ''.join(map(chr, out))
       
   
 class IllegalInput(Exception):
    def __init__(self, e):
        self.exception = e
 
    def __str__(self):
        return repr(self.exception)
   
 
 class Parser:
    def __init__(self):
 
        self.delta = 0
        self.curr_mask = 0 # current mask to unicode
 
        self.prev_char = self.src_char = self.dest_char = NO_CHAR
 
        self.dest = []
        self.pos = 0
 
        self.stat = [0] * 10
 
    def write_output(self):
 
        out = to_utf8(self.dest)
        sys.stdout.write(out)
 
        self.dest = []
       
                       
 
    def set_script(self, i):
        """
       set the value of delta to reflect the current codepage
       
       """
 
        if i in range(1, 10):
            n = i - 1
        else:
            raise IllegalInput, "Invalid Value for ATR %s" % (hex(i))
 
        if n > -1: # n = -1 is the default script ..
            self.curr_script = n
            self.delta = n * DELTA
       
        return
   
 
    def isvalid(self, i):
 
        return validation_table[i & 0xFF][self.curr_script]
   
 
    def isvalid_iscii(self, x):
 
        return not invalid_chars[self.curr_script].has_key(x)
 
 
    def is_nukta_special(self, i):
 
        x = nukta_specials.get(i, None)
 
        return x
 
 
    def handle_ext(self, curr_char):
 
        self.pos += 1 # for EXT
       
        for a in range(1):
           
            if not ((EXT_RANGE_END >= curr_char) and\
                    (EXT_RANGE_BEGIN <= curr_char)):
                break
           
            if curr_char not in [0xBF, 0xB8]:
                break
           
            if curr_char == 0xBF:
                dest_char = DEV_ABBR_SIGN
            else:
                dest_char = DEV_ANUDATTA
                   
            if self.isvalid(dest_char):
                return dest_char
 
 
        print >> sys.stderr, "Invalid input after EXT %s" % (hex(i))
        return None
   
 
    def handle_atr(self, i):
 
        print >> sys.stderr, "Handling ATR:",
        if i in ISCII_SCRIPTS.keys():
            self.set_script(ISCII_SCRIPTS[i])
            print >> sys.stderr, "setting script to", i
        else:
            # ignore all other ATR markers
            print >> sys.stderr, "ignored"
            pass
       
        self.pos += 2 # for ATR and the following char
       
        return None
 
    def handle_inv(self, i):
 
        if i == ISCII_HALANT:
            ret = 0x0020
        else:
            ret = ZWJ
 
        self.pos += 1 # for INV
 
        return ret
       
 
    def post_analysis(self, prev_char, src_char):
 
        if prev_char == ISCII_ATR:
            ret = self.handle_atr(src_char)
               
        elif prev_char == ISCII_EXT:
               
            ret = self.handle_ext(src_char)
           
        elif prev_char == ISCII_INV:
           
            ret = self.handle_inv(src_char)
           
        return ret
 
 
 
    def iscii2utf8(self, src, flush = 0):
 
        dest = self.dest
        src = array.array('B', src).tolist()
 
       
        curr_char = prev_char = NO_CHAR
        n = len(src)
        self.pos = 0
 
        stat = self.stat
       
        for i in range(n):
            curr_char = src[i]
            dest_char = NO_CHAR
            add_prev = 0
           
            if invalid_chars[self.curr_script][curr_char]:
                # just ignore the invalid iscii characters
                print >> sys.stderr, 'ignoring invalid iscii char', \
                      hex(curr_char)
                self.pos += 1
                continue
           
            if flush and (i == (n - 1)):
                dest_char = curr_char
           
            elif (prev_char == NO_CHAR):
                prev_char = curr_char
                continue
           
            elif prev_char in ISCII_SPECIALS:
                ret = self.post_analysis(prev_char, curr_char)
 
                if ret is not None:
                    dest_char = ret
                   
                prev_char = NO_CHAR
 
            elif not iscii_modifying[curr_char]:
                pass
               
            elif curr_char in ISCII_SPECIALS:
                dest_char = prev_char
                prev_char = curr_char
           
            elif (curr_char == ISCII_DANDA) and (prev_char == ISCII_DANDA):
                dest_char = DOUBLE_DANDA
                prev_char = NO_CHAR
                self.pos += 1
                   
            elif (curr_char == ISCII_HALANT) and (prev_char == ISCII_HALANT):
                dest_char = ZWNJ
                add_prev = 1
 
            elif curr_char == ISCII_NUKTA:
                if prev_char == ISCII_HALANT:
                    dest_char = ZWJ
                    add_prev = 1
 
                else:
                    tmp = self.is_nukta_special(prev_char)
                   
                    if tmp: # nukta special
                        dest_char = tmp
                        prev_char = NO_CHAR
                        self.pos += 1
 
            to_add = []
           
            if add_prev == 1:
                to_add.append(prev_char)
                prev_char = NO_CHAR
                             
            if dest_char != NO_CHAR:
                to_add.append(dest_char)
 
            elif prev_char != NO_CHAR:
                to_add.append(prev_char)
                prev_char = curr_char
 
            for ch in to_add:
                if (ch <= 0xFF):
                    m = script_maps[self.curr_script][ch]
                else:
                    m = ch
 
                    # end of mapping
                   
                self.pos += 1
                self.dest.append(m)
 
 
        return self.pos
   
 
 def show_usage(name):
    usage = """
   Usage:
 
   %s script
 
   where script is a number between 1-9
 
   1 - devnag
   2 - bengali / assamese
   3 - punjabi
   4 - gujarati
   5 - oriya
   6 - tamil
   7 - telugu
   8 - kannada
   9 - malayalam
 
   the program reads from stdin and writes to stdout
 
   any msgs to the user (error msgs etc) are printed on stderr
   """ % (name)
   
    print >> sys.stderr,  usage
    sys.exit(1)
 
 
 chunk_size = 4096
 
 if __name__ == '__main__':
 
    try:
       
        i = int(sys.argv[1])
       
        if i not in range(1, 10):
            raise ValueError
   
    except (ValueError, IndexError):
        show_usage(sys.argv[0])
   
    mypar = Parser()
    mypar.set_script(i)
 
    y = ''
    flush = 0
   
    while 1:
       
        if flush:
            break
       
        x = sys.stdin.read(chunk_size)
       
        if not x:
            flush = 1
 
        x = y + x
       
        n = mypar.iscii2utf8(x, flush)
        y = x[n:]
 
        mypar.write_output()
	#!/usr/bin/env python

	# released under BSD License

	# mary <[email protected]> aka meyarivan <[email protected]>

	# inspired by ICU [ http://oss.software.ibm.com/icu/ ]

	# code still in alpha stage.. lots of redundant code.. and probably incorrect
	# if ya find errors, pls submit bug reports at indlinux

	# for usage, either run the script or scroll down to end of the script

	import sys
	import array

	# Generic Constants

	ISCII_ATR = 0x00EF
	ATR_MASK = 0x004F
	DANDA = 0x0964
	DELTA = 0x0080
	DEV_ANUDATTA = 0x0952
	DOUBLE_DANDA = 0x0965
	ISCII_EXT = 0x00F0
	EXT_RANGE_BEGIN = 0x00A1
	EXT_RANGE_END = 0x00EE
	HALANT = 0x094d
	INDIC_BLOCK_BEGIN = 0x0900
	INDIC_BLOCK_END = 0x0D7F
	INVALID_CHAR = 0xFFFF
	ISCII_BEGIN = 0x00A0
	ISCII_DANDA = 0x00EA
	ISCII_HALANT = 0x00E8
	ISCII_INV = 0x00D9
	ISCII_NUKTA = 0x00E9
	LF = 0x000A
	NO_CHAR = 0xFFFE
	UNI_BEGIN = 0x0900
	UNI_END = 0x097F
	ZWJ = 0x200d
	ZWNJ = 0x200c


	# map between the ISCII scripts as specified via the ATR switch
	# and the script in unicode

	# bengali and assamese have same script except for two characters
	# (according to ISCII 91

	ISCII_SCRIPTS = {
	0x40 : -1, # DEFAULT
	0x42 : 0, # DEVNAG
	0x43 : 1, # BENGALI
	0x44 : 5, # TAMIL
	0x45 : 6, # TELUGU
	0x46 : 1, # ASSAMESE
	0x47 : 4, # ORIYA
	0x48 : 7, # KANNADA
	0x49 : 8, # MALAYALAM
	0x4A : 3, # GUJARATI
	0x4B : 2 # PUNJABI
	}

	# ISCII_SPECIAL CHARS

	ISCII_SPECIALS = [ISCII_ATR, ISCII_EXT, ISCII_INV]

	iscii_to_unicode = {
	0xa0 : 0x900,
	0xa1 : 0x901,
	0xa2 : 0x902,
	0xa3 : 0x903,
	0xa4 : 0x905,
	0xa5 : 0x906,
	0xa6 : 0x907,
	0xa7 : 0x908,
	0xa8 : 0x909,
	0xa9 : 0x90a,
	0xaa : 0x90b,
	0xab : 0x90e,
	0xac : 0x90f,
	0xad : 0x910,
	0xae : 0x90d,
	0xaf : 0x912,
	0xb0 : 0x913,
	0xb1 : 0x914,
	0xb2 : 0x911,
	0xb3 : 0x915,
	0xb4 : 0x916,
	0xb5 : 0x917,
	0xb6 : 0x918,
	0xb7 : 0x919,
	0xb8 : 0x91a,
	0xb9 : 0x91b,
	0xba : 0x91c,
	0xbb : 0x91d,
	0xbc : 0x91e,
	0xbd : 0x91f,
	0xbe : 0x920,
	0xbf : 0x921,
	0xc0 : 0x922,
	0xc1 : 0x923,
	0xc2 : 0x924,
	0xc3 : 0x925,
	0xc4 : 0x926,
	0xc5 : 0x927,
	0xc6 : 0x928,
	0xc7 : 0x929,
	0xc8 : 0x92a,
	0xc9 : 0x92b,
	0xca : 0x92c,
	0xcb : 0x92d,
	0xcc : 0x92e,
	0xcd : 0x92f,
	0xce : 0x95f,
	0xcf : 0x930,
	0xd0 : 0x931,
	0xd1 : 0x932,
	0xd2 : 0x933,
	0xd3 : 0x934,
	0xd4 : 0x935,
	0xd5 : 0x936,
	0xd6 : 0x937,
	0xd7 : 0x938,
	0xd8 : 0x939,
	0xd9 : 0x200d,
	0xda : 0x93e,
	0xdb : 0x93f,
	0xdc : 0x940,
	0xdd : 0x941,
	0xde : 0x942,
	0xdf : 0x943,
	0xe0 : 0x946,
	0xe1 : 0x947,
	0xe2 : 0x948,
	0xe3 : 0x945,
	0xe4 : 0x94a,
	0xe5 : 0x94b,
	0xe6 : 0x94c,
	0xe7 : 0x949,
	0xe8 : 0x94d,
	0xe9 : 0x93c,
	0xea : 0x964,
	0xeb : 0xffff,
	0xec : 0xffff,
	0xed : 0xffff,
	0xee : 0xffff,
	0xef : 0xffff,
	0xf0 : 0xffff,
	0xf1 : 0x966,
	0xf2 : 0x967,
	0xf3 : 0x968,
	0xf4 : 0x969,
	0xf5 : 0x96a,
	0xf6 : 0x96b,
	0xf7 : 0x96c,
	0xf8 : 0x96d,
	0xf9 : 0x96e,
	0xfa : 0x96f,
	0xfb : 0xffff,
	0xfc : 0xffff,
	0xfd : 0xffff,
	0xfe : 0xffff,
	0xff : 0xffff
	}


	"""
	# code to generate the validation_table
	( ya ya .. agreed that it is kludgy)

	# need python 2.3

	import unicodedata

	UNI_BEGIN = 0x0900
	UNI_END = 0x097F
	DELTA = 0x80
	SCRS = 9

	table = []

	for char in range(UNI_END - UNI_BEGIN + 1):
	res = [0] * SCRS
	for scr in range(0, SCRS):
	val = unichr(UNI_BEGIN + (scr * DELTA) + char)
	res[scr] = int(unicodedata.name(val, None) is not None)

	table.append(res)

	"""

	validation_table = [
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x0 0
	[1, 1, 0, 1, 1, 0, 1, 0, 0], # 0x1 1
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x2 2
	[1, 1, 0, 1, 1, 1, 1, 1, 1], # 0x3 3
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x4 4
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x5 5
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6 6
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x7 7
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x8 8
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x9 9
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0xa 10
	[1, 1, 0, 1, 1, 0, 1, 1, 1], # 0xb 11
	[1, 1, 0, 0, 1, 0, 1, 1, 1], # 0xc 12
	[1, 0, 0, 1, 0, 0, 0, 0, 0], # 0xd 13
	[1, 0, 0, 0, 0, 1, 1, 1, 1], # 0xe 14
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0xf 15
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x10 16
	[1, 0, 0, 1, 0, 0, 0, 0, 0], # 0x11 17
	[1, 0, 0, 0, 0, 1, 1, 1, 1], # 0x12 18
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x13 19
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x14 20
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x15 21
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x16 22
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x17 23
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x18 24
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x19 25
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x1a 26
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x1b 27
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x1c 28
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x1d 29
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x1e 30
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x1f 31
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x20 32
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x21 33
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x22 34
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x23 35
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x24 36
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x25 37
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x26 38
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x27 39
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x28 40
	[1, 0, 0, 0, 0, 1, 0, 0, 0], # 0x29 41
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x2a 42
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x2b 43
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x2c 44
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x2d 45
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x2e 46
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x2f 47
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x30 48
	[1, 0, 0, 0, 0, 1, 1, 1, 1], # 0x31 49
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x32 50
	[1, 0, 1, 1, 1, 1, 1, 1, 1], # 0x33 51
	[1, 0, 0, 0, 0, 1, 0, 0, 1], # 0x34 52
	[1, 0, 1, 1, 0, 1, 1, 1, 1], # 0x35 53
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x36 54
	[1, 1, 0, 1, 1, 1, 1, 1, 1], # 0x37 55
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x38 56
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x39 57
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x3a 58
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x3b 59
	[1, 1, 1, 1, 1, 0, 0, 0, 0], # 0x3c 60
	[1, 0, 0, 1, 1, 0, 0, 0, 0], # 0x3d 61
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x3e 62
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x3f 63
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x40 64
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x41 65
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x42 66
	[1, 1, 0, 1, 1, 0, 1, 1, 1], # 0x43 67
	[1, 1, 0, 1, 0, 0, 1, 1, 0], # 0x44 68
	[1, 0, 0, 1, 0, 0, 0, 0, 0], # 0x45 69
	[1, 0, 0, 0, 0, 1, 1, 1, 1], # 0x46 70
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x47 71
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x48 72
	[1, 0, 0, 1, 0, 0, 0, 0, 0], # 0x49 73
	[1, 0, 0, 0, 0, 1, 1, 1, 1], # 0x4a 74
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x4b 75
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x4c 76
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x4d 77
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x4e 78
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x4f 79
	[1, 0, 0, 1, 0, 0, 0, 0, 0], # 0x50 80
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x51 81
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x52 82
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x53 83
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x54 84
	[0, 0, 0, 0, 0, 0, 1, 1, 0], # 0x55 85
	[0, 0, 0, 0, 1, 0, 1, 1, 0], # 0x56 86
	[0, 1, 0, 0, 1, 1, 0, 0, 1], # 0x57 87
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x58 88
	[1, 0, 1, 0, 0, 0, 0, 0, 0], # 0x59 89
	[1, 0, 1, 0, 0, 0, 0, 0, 0], # 0x5a 90
	[1, 0, 1, 0, 0, 0, 0, 0, 0], # 0x5b 91
	[1, 1, 1, 0, 1, 0, 0, 0, 0], # 0x5c 92
	[1, 1, 0, 0, 1, 0, 0, 0, 0], # 0x5d 93
	[1, 0, 1, 0, 0, 0, 0, 1, 0], # 0x5e 94
	[1, 1, 0, 0, 1, 0, 0, 0, 0], # 0x5f 95
	[1, 1, 0, 1, 1, 0, 1, 1, 1], # 0x60 96
	[1, 1, 0, 0, 1, 0, 1, 1, 1], # 0x61 97
	[1, 1, 0, 0, 0, 0, 0, 0, 0], # 0x62 98
	[1, 1, 0, 0, 0, 0, 0, 0, 0], # 0x63 99
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x64 100
	[1, 0, 0, 0, 0, 0, 0, 0, 0], # 0x65 101
	[1, 1, 1, 1, 1, 0, 1, 1, 1], # 0x66 102
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x67 103
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x68 104
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x69 105
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6a 106
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6b 107
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6c 108
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6d 109
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6e 110
	[1, 1, 1, 1, 1, 1, 1, 1, 1], # 0x6f 111
	[1, 1, 1, 0, 1, 1, 0, 0, 0], # 0x70 112
	[0, 1, 1, 0, 0, 1, 0, 0, 0], # 0x71 113
	[0, 1, 1, 0, 0, 1, 0, 0, 0], # 0x72 114
	[0, 1, 1, 0, 0, 0, 0, 0, 0], # 0x73 115
	[0, 1, 1, 0, 0, 0, 0, 0, 0], # 0x74 116
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x75 117
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x76 118
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x77 119
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x78 120
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x79 121
	[0, 1, 0, 0, 0, 0, 0, 0, 0], # 0x7a 122
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x7b 123
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x7c 124
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x7d 125
	[0, 0, 0, 0, 0, 0, 0, 0, 0], # 0x7e 126
	[0, 0, 0, 0, 0, 0, 0, 0, 0] # 0x7f 127
	]

	# special characters formed by combination of consonants and nukta

	nukta_specials = {
	0xA6 : 0x090c,
	0xEA : 0x093D,
	0xDF : 0x0944,
	0xA1 : 0x0950,
	0xB3 : 0x0958,
	0xB4 : 0x0959,
	0xB5 : 0x095A,
	0xBA : 0x095B,
	0xBF : 0x095C,
	0xC0 : 0x095D,
	0xC9 : 0x095E,
	0xAA : 0x0960,
	0xA7 : 0x0961,
	0xDB : 0x0962,
	0xDC : 0x0963
	}


	special_maps = {
	## the two points which are different between assamese and bengali
	## (according to the charts in ISCII-91 documentation)
	(5, 0xCF) : 0x09F0,
	(5, 0xD4) : 0x09F1,
	}



	def make_script_maps():
	_invalid_range = range(0xEB, 0xF1) + range(0xFB, 0xFF + 1)
	scripts = {}

	for i in range(9):
	curr_scr = {}

	for ch in range(0xA0):
	curr_scr[ch] = ch


	for ch in range(0xA0, 0xFF + 1):

	if (ch in _invalid_range):
	continue

	t = iscii_to_unicode[ch]

	if validation_table[t & 0xFF][i]:
	curr_scr[ch] = t

	curr_scr[ch] += (i * 0x80)


	scripts[i] = curr_scr

	for i in special_maps:
	scripts[i[0]][i[1]] = special_maps[i]

	return scripts

	## setup the script map (iscii -> unicode) for all the supported scripts
	## hope it speeds up things

	script_maps = make_script_maps()

	####

	def make_invalid_maps():
	maps = {}

	for i in range(9):
	curr_map = {}

	for j in range(0xFF + 1):
	curr_map[j] = (not script_maps[i].has_key(j)) and \
	(j not in ISCII_SPECIALS)
	maps[i] = curr_map

	return maps

	## setup the map of invalid characters for each script

	invalid_chars = make_invalid_maps()

	## tmp code

	iscii_modifying = {}
	_tmp = ISCII_SPECIALS + [ISCII_HALANT, ISCII_NUKTA, ISCII_DANDA]

	for i in range(0xFF + 1):
	iscii_modifying[i] = int(i in _tmp)

	## end of tmp code

	def to_utf8(y):
	"""
	converts an array of integers to utf8 string
	"""

	out = []

	for x in y:

	if x < 0x080:
	out.append(x)
	elif x < 0x0800:
	out.append((x >> 6) \| 0xC0)
	out.append((x & 0x3F) \| 0x80)
	elif x < 0x10000:
	out.append((x >> 12) \| 0xE0)
	out.append(((x >> 6) & 0x3F) \| 0x80)
	out.append((x & 0x3F) \| 0x80)
	else:
	out.append((x >> 18) \| 0xF0)
	out.append((x >> 12) & 0x3F)
	out.append(((x >> 6) & 0x3F) \| 0x80)
	out.append((x & 0x3F) \| 0x80)

	return ''.join(map(chr, out))


	class IllegalInput(Exception):
	def __init__(self, e):
	self.exception = e

	def __str__(self):
	return repr(self.exception)


	class Parser:
	def __init__(self):

	self.delta = 0
	self.curr_mask = 0 # current mask to unicode

	self.prev_char = self.src_char = self.dest_char = NO_CHAR

	self.dest = []
	self.pos = 0

	self.stat = [0] * 10

	def write_output(self):

	out = to_utf8(self.dest)
	sys.stdout.write(out)

	self.dest = []



	def set_script(self, i):
	"""
	set the value of delta to reflect the current codepage

	"""

	if i in range(1, 10):
	n = i - 1
	else:
	raise IllegalInput, "Invalid Value for ATR %s" % (hex(i))

	if n > -1: # n = -1 is the default script ..
	self.curr_script = n
	self.delta = n * DELTA

	return


	def isvalid(self, i):

	return validation_table[i & 0xFF][self.curr_script]


	def isvalid_iscii(self, x):

	return not invalid_chars[self.curr_script].has_key(x)


	def is_nukta_special(self, i):

	x = nukta_specials.get(i, None)

	return x


	def handle_ext(self, curr_char):

	self.pos += 1 # for EXT

	for a in range(1):

	if not ((EXT_RANGE_END >= curr_char) and\
	(EXT_RANGE_BEGIN <= curr_char)):
	break

	if curr_char not in [0xBF, 0xB8]:
	break

	if curr_char == 0xBF:
	dest_char = DEV_ABBR_SIGN
	else:
	dest_char = DEV_ANUDATTA

	if self.isvalid(dest_char):
	return dest_char


	print >> sys.stderr, "Invalid input after EXT %s" % (hex(i))
	return None


	def handle_atr(self, i):

	print >> sys.stderr, "Handling ATR:",
	if i in ISCII_SCRIPTS.keys():
	self.set_script(ISCII_SCRIPTS[i])
	print >> sys.stderr, "setting script to", i
	else:
	# ignore all other ATR markers
	print >> sys.stderr, "ignored"
	pass

	self.pos += 2 # for ATR and the following char

	return None

	def handle_inv(self, i):

	if i == ISCII_HALANT:
	ret = 0x0020
	else:
	ret = ZWJ

	self.pos += 1 # for INV

	return ret


	def post_analysis(self, prev_char, src_char):

	if prev_char == ISCII_ATR:
	ret = self.handle_atr(src_char)

	elif prev_char == ISCII_EXT:

	ret = self.handle_ext(src_char)

	elif prev_char == ISCII_INV:

	ret = self.handle_inv(src_char)

	return ret



	def iscii2utf8(self, src, flush = 0):

	dest = self.dest
	src = array.array('B', src).tolist()


	curr_char = prev_char = NO_CHAR
	n = len(src)
	self.pos = 0

	stat = self.stat

	for i in range(n):
	curr_char = src[i]
	dest_char = NO_CHAR
	add_prev = 0

	if invalid_chars[self.curr_script][curr_char]:
	# just ignore the invalid iscii characters
	print >> sys.stderr, 'ignoring invalid iscii char', \
	hex(curr_char)
	self.pos += 1
	continue

	if flush and (i == (n - 1)):
	dest_char = curr_char

	elif (prev_char == NO_CHAR):
	prev_char = curr_char
	continue

	elif prev_char in ISCII_SPECIALS:
	ret = self.post_analysis(prev_char, curr_char)

	if ret is not None:
	dest_char = ret

	prev_char = NO_CHAR

	elif not iscii_modifying[curr_char]:
	pass

	elif curr_char in ISCII_SPECIALS:
	dest_char = prev_char
	prev_char = curr_char

	elif (curr_char == ISCII_DANDA) and (prev_char == ISCII_DANDA):
	dest_char = DOUBLE_DANDA
	prev_char = NO_CHAR
	self.pos += 1

	elif (curr_char == ISCII_HALANT) and (prev_char == ISCII_HALANT):
	dest_char = ZWNJ
	add_prev = 1

	elif curr_char == ISCII_NUKTA:
	if prev_char == ISCII_HALANT:
	dest_char = ZWJ
	add_prev = 1

	else:
	tmp = self.is_nukta_special(prev_char)

	if tmp: # nukta special
	dest_char = tmp
	prev_char = NO_CHAR
	self.pos += 1

	to_add = []

	if add_prev == 1:
	to_add.append(prev_char)
	prev_char = NO_CHAR

	if dest_char != NO_CHAR:
	to_add.append(dest_char)

	elif prev_char != NO_CHAR:
	to_add.append(prev_char)
	prev_char = curr_char

	for ch in to_add:
	if (ch <= 0xFF):
	m = script_maps[self.curr_script][ch]
	else:
	m = ch

	# end of mapping

	self.pos += 1
	self.dest.append(m)


	return self.pos


	def show_usage(name):
	usage = """
	Usage:

	%s script

	where script is a number between 1-9

	1 - devnag
	2 - bengali / assamese
	3 - punjabi
	4 - gujarati
	5 - oriya
	6 - tamil
	7 - telugu
	8 - kannada
	9 - malayalam

	the program reads from stdin and writes to stdout

	any msgs to the user (error msgs etc) are printed on stderr
	""" % (name)

	print >> sys.stderr, usage
	sys.exit(1)


	chunk_size = 4096

	if __name__ == '__main__':

	try:

	i = int(sys.argv[1])

	if i not in range(1, 10):
	raise ValueError

	except (ValueError, IndexError):
	show_usage(sys.argv[0])

	mypar = Parser()
	mypar.set_script(i)

	y = ''
	flush = 0

	while 1:

	if flush:
	break

	x = sys.stdin.read(chunk_size)

	if not x:
	flush = 1

	x = y + x

	n = mypar.iscii2utf8(x, flush)
	y = x[n:]

	mypar.write_output()