barata0 · March 22, 2019 14:05
diff --git a/parseSTLSubtitle.groovy b/parseSTLSubtitle.groovy
 // https://mvnrepository.com/artifact/fr.noop/charset
 @Grapes(
    @Grab(group='fr.noop', module='charset', version='1.0.1')
 )

 import static fr.noop.charset.CharsetProvider.*;


 def f = new File('C:/Users/danielr/Desktop/legendas/AMERICANHORRORSTORY_6ATS01_ORIG_V473377_INT_C_TVD_SUBFULSTL_29_BPO_178_MOS-XX_RT004242.stl')

 def size = f.size()

 def CCTS = [
    new fr.noop.charset.iso6937.Iso6937Charset(ISO6937_NAME , ISO6937_ALIASES ),
    java.nio.charset.Charset.forName('ISO-8859-5'),
    java.nio.charset.Charset.forName('ISO-8859-6'),
    java.nio.charset.Charset.forName('ISO-8859-7'),
    java.nio.charset.Charset.forName('ISO-8859-8')
 ]

 UNUSED = (byte) 0x8f
 LINE_BREAK = (byte) 0x8a
 ITALIC_ON = 0x80
 ITALIC_OFF = 0x81

 println "LINE_BREAK = ${LINE_BREAK}"

 b = f.bytes


 def read(int from, int to) {
    b[from..to].collect {it}
 }
 def readAsChar(int from, int to) {
    b[from..to].collect {(char)it}
 }

 def readAsByte(int from, int to) {
    b[from..to].collect {(byte)it}
 }

 def readAsInt(int from, int to) {
    b[from..to].collect {(int)(it & 0xFF)}
 }


 def gsiFields = [
    cpn: [0, 2],
    dfc: [3, 10],
    dsc: [11, 11],
    cct: [12, 13],
    lc: [14, 15],
    opt: [16, 47],
    oet: [48, 79],
    tpt: [80, 111],
    tet: [112, 143],
    tn: [144, 175],
    tcd: [176, 207],
    slr: [208, 223],
    cd: [224, 229],
    rd: [230, 235],
    rn: [236, 237],
    tnb: [238, 242],
    tns: [243, 247],
    tng: [248, 250],
    mmc: [251, 252],
    mnr: [253, 254],
    tcs: [255, 255],
    tcp: [256, 263],
    tcf: [264, 271],
    tnd: [272, 272],
    dsn: [273, 273],
    co: [274, 276],
    pub: [277, 308],
    en: [309, 340],
    ecd: [341, 372],
    uds: [448, 1023]
 ]

 def ttiFields = [
    sgn: [0, 0],
    sn: [1, 2],
    ebn: [3, 3],
    cs: [4, 4],
    tci: [5, 8],
    tco: [9, 12],
    vp: [13, 13],
    jc: [14, 14],
    cf: [15, 15],
    tf: [16, 127]
 ]

 gsi = gsiFields.collectEntries{k, v ->[(k): readAsChar(v[0], v[1]).join().trim()]}

 def tti = []

 def offset = 1024
 while (offset < size) {
    def map = ttiFields.collectEntries{k, v ->[(k): readAsByte(v[0] + offset, v[1] + offset)]}
    map.tf = sanitizeArrayForText(map.tf)
    println map.tf
    map.tf = new String(map.tf as byte[], CCTS[gsi.cct.toInteger()])
    map.tf = map.tf.replaceAll("" + (char)ITALIC_ON, "<i>")
    map.tf = map.tf.replaceAll("" + (char)ITALIC_OFF, "</i>")
    tti << map
    offset += 128
 }



 def sanitizeArrayForText(a) {

    byte[] removableChars = [0x82, 0x83, 0x84, 0x85, UNUSED]
    removableChars.each { b ->
        a.removeAll(b)
    }
    a = a.collect{ c ->
        if (c==LINE_BREAK) {
            return (byte)'\n'
        }
        return (byte)c
    }
 }

 println gsi
 print tti.join('\n')
	// https://mvnrepository.com/artifact/fr.noop/charset
	@Grapes(
	@Grab(group='fr.noop', module='charset', version='1.0.1')
	)

	import static fr.noop.charset.CharsetProvider.*;


	def f = new File('C:/Users/danielr/Desktop/legendas/AMERICANHORRORSTORY_6ATS01_ORIG_V473377_INT_C_TVD_SUBFULSTL_29_BPO_178_MOS-XX_RT004242.stl')

	def size = f.size()

	def CCTS = [
	new fr.noop.charset.iso6937.Iso6937Charset(ISO6937_NAME , ISO6937_ALIASES ),
	java.nio.charset.Charset.forName('ISO-8859-5'),
	java.nio.charset.Charset.forName('ISO-8859-6'),
	java.nio.charset.Charset.forName('ISO-8859-7'),
	java.nio.charset.Charset.forName('ISO-8859-8')
	]

	UNUSED = (byte) 0x8f
	LINE_BREAK = (byte) 0x8a
	ITALIC_ON = 0x80
	ITALIC_OFF = 0x81

	println "LINE_BREAK = ${LINE_BREAK}"

	b = f.bytes


	def read(int from, int to) {
	b[from..to].collect {it}
	}
	def readAsChar(int from, int to) {
	b[from..to].collect {(char)it}
	}

	def readAsByte(int from, int to) {
	b[from..to].collect {(byte)it}
	}

	def readAsInt(int from, int to) {
	b[from..to].collect {(int)(it & 0xFF)}
	}


	def gsiFields = [
	cpn: [0, 2],
	dfc: [3, 10],
	dsc: [11, 11],
	cct: [12, 13],
	lc: [14, 15],
	opt: [16, 47],
	oet: [48, 79],
	tpt: [80, 111],
	tet: [112, 143],
	tn: [144, 175],
	tcd: [176, 207],
	slr: [208, 223],
	cd: [224, 229],
	rd: [230, 235],
	rn: [236, 237],
	tnb: [238, 242],
	tns: [243, 247],
	tng: [248, 250],
	mmc: [251, 252],
	mnr: [253, 254],
	tcs: [255, 255],
	tcp: [256, 263],
	tcf: [264, 271],
	tnd: [272, 272],
	dsn: [273, 273],
	co: [274, 276],
	pub: [277, 308],
	en: [309, 340],
	ecd: [341, 372],
	uds: [448, 1023]
	]

	def ttiFields = [
	sgn: [0, 0],
	sn: [1, 2],
	ebn: [3, 3],
	cs: [4, 4],
	tci: [5, 8],
	tco: [9, 12],
	vp: [13, 13],
	jc: [14, 14],
	cf: [15, 15],
	tf: [16, 127]
	]

	gsi = gsiFields.collectEntries{k, v ->[(k): readAsChar(v[0], v[1]).join().trim()]}

	def tti = []

	def offset = 1024
	while (offset < size) {
	def map = ttiFields.collectEntries{k, v ->[(k): readAsByte(v[0] + offset, v[1] + offset)]}
	map.tf = sanitizeArrayForText(map.tf)
	println map.tf
	map.tf = new String(map.tf as byte[], CCTS[gsi.cct.toInteger()])
	map.tf = map.tf.replaceAll("" + (char)ITALIC_ON, "<i>")
	map.tf = map.tf.replaceAll("" + (char)ITALIC_OFF, "</i>")
	tti << map
	offset += 128
	}



	def sanitizeArrayForText(a) {

	byte[] removableChars = [0x82, 0x83, 0x84, 0x85, UNUSED]
	removableChars.each { b ->
	a.removeAll(b)
	}
	a = a.collect{ c ->
	if (c==LINE_BREAK) {
	return (byte)'\n'
	}
	return (byte)c
	}
	}

	println gsi
	print tti.join('\n')