Created
March 22, 2019 14:05
-
-
Save barata0/11377987d91e8b7f42f777d250507f5d to your computer and use it in GitHub Desktop.
Parse STL Subtitle
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://mvnrepository.com/artifact/fr.noop/charset | |
@Grapes( | |
@Grab(group='fr.noop', module='charset', version='1.0.1') | |
) | |
import static fr.noop.charset.CharsetProvider.*; | |
def f = new File('C:/Users/danielr/Desktop/legendas/AMERICANHORRORSTORY_6ATS01_ORIG_V473377_INT_C_TVD_SUBFULSTL_29_BPO_178_MOS-XX_RT004242.stl') | |
def size = f.size() | |
def CCTS = [ | |
new fr.noop.charset.iso6937.Iso6937Charset(ISO6937_NAME , ISO6937_ALIASES ), | |
java.nio.charset.Charset.forName('ISO-8859-5'), | |
java.nio.charset.Charset.forName('ISO-8859-6'), | |
java.nio.charset.Charset.forName('ISO-8859-7'), | |
java.nio.charset.Charset.forName('ISO-8859-8') | |
] | |
UNUSED = (byte) 0x8f | |
LINE_BREAK = (byte) 0x8a | |
ITALIC_ON = 0x80 | |
ITALIC_OFF = 0x81 | |
println "LINE_BREAK = ${LINE_BREAK}" | |
b = f.bytes | |
def read(int from, int to) { | |
b[from..to].collect {it} | |
} | |
def readAsChar(int from, int to) { | |
b[from..to].collect {(char)it} | |
} | |
def readAsByte(int from, int to) { | |
b[from..to].collect {(byte)it} | |
} | |
def readAsInt(int from, int to) { | |
b[from..to].collect {(int)(it & 0xFF)} | |
} | |
def gsiFields = [ | |
cpn: [0, 2], | |
dfc: [3, 10], | |
dsc: [11, 11], | |
cct: [12, 13], | |
lc: [14, 15], | |
opt: [16, 47], | |
oet: [48, 79], | |
tpt: [80, 111], | |
tet: [112, 143], | |
tn: [144, 175], | |
tcd: [176, 207], | |
slr: [208, 223], | |
cd: [224, 229], | |
rd: [230, 235], | |
rn: [236, 237], | |
tnb: [238, 242], | |
tns: [243, 247], | |
tng: [248, 250], | |
mmc: [251, 252], | |
mnr: [253, 254], | |
tcs: [255, 255], | |
tcp: [256, 263], | |
tcf: [264, 271], | |
tnd: [272, 272], | |
dsn: [273, 273], | |
co: [274, 276], | |
pub: [277, 308], | |
en: [309, 340], | |
ecd: [341, 372], | |
uds: [448, 1023] | |
] | |
def ttiFields = [ | |
sgn: [0, 0], | |
sn: [1, 2], | |
ebn: [3, 3], | |
cs: [4, 4], | |
tci: [5, 8], | |
tco: [9, 12], | |
vp: [13, 13], | |
jc: [14, 14], | |
cf: [15, 15], | |
tf: [16, 127] | |
] | |
gsi = gsiFields.collectEntries{k, v ->[(k): readAsChar(v[0], v[1]).join().trim()]} | |
def tti = [] | |
def offset = 1024 | |
while (offset < size) { | |
def map = ttiFields.collectEntries{k, v ->[(k): readAsByte(v[0] + offset, v[1] + offset)]} | |
map.tf = sanitizeArrayForText(map.tf) | |
println map.tf | |
map.tf = new String(map.tf as byte[], CCTS[gsi.cct.toInteger()]) | |
map.tf = map.tf.replaceAll("" + (char)ITALIC_ON, "<i>") | |
map.tf = map.tf.replaceAll("" + (char)ITALIC_OFF, "</i>") | |
tti << map | |
offset += 128 | |
} | |
def sanitizeArrayForText(a) { | |
byte[] removableChars = [0x82, 0x83, 0x84, 0x85, UNUSED] | |
removableChars.each { b -> | |
a.removeAll(b) | |
} | |
a = a.collect{ c -> | |
if (c==LINE_BREAK) { | |
return (byte)'\n' | |
} | |
return (byte)c | |
} | |
} | |
println gsi | |
print tti.join('\n') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment