Skip to content

Instantly share code, notes, and snippets.

@osima
Created January 13, 2011 06:34
Show Gist options
  • Select an option

  • Save osima/777493 to your computer and use it in GitHub Desktop.

Select an option

Save osima/777493 to your computer and use it in GitHub Desktop.
Fix (R) and (TM) mojibake
//
// Fix (R) and (TM) mojibake
//
String replace(String str){
def log = { text,i->
def out = ''<<''
for(int j=Math.max(0,i-10); j<i; j++){
out << text.charAt(j)
}
out.toString()
}
def sb = ''<<''
for( int i=0; i<str.length(); i++){
boolean found = false
if( i>0 ){
def m0 = ( str.charAt(i-1) =~ /[a-zA-Z]/ )
def m1 = ( str.charAt(i-1) =~ />/ )
if( m0.find() || m1.find() ){
String hexstr = Integer.toHexString( (int)str.charAt(i) )
if( hexstr == '30a3' ){
println "found (R) -> (${log(str,i)})"
sb << '&reg;'
found = true
}
else if( hexstr == '30a7' ){
println "found TM -> (${log(str,i)})"
sb << '&trade;'
}
}
}
if( found==false ){
sb << str.charAt(i)
}
}
sb.toString()
}
if( args.length<1 )
System.exit(0)
def ENC = 'MS932'
def inf = new File(args[0])
def outf = inf
println "--- ${inf.name} ---"
def text = inf.getText(ENC)
def w = outf.newWriter(ENC)
w.print( replace(text) )
w.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment