Created
April 29, 2020 00:40
-
-
Save bdkosher/40acc61aa10fa4369889c49fc5c6b2e6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.transform.* | |
import java.nio.* | |
import java.nio.charset.* | |
@Field CharsetDecoder cs = Charset.forName('UTF-8').newDecoder() | |
InputStream.metaClass.eachChunk << { int preferredChunkSize, Closure closure -> | |
delegate.eachByte(preferredChunkSize) { buffer, bytesRead -> | |
if (bytesRead == preferredChunkSize) { | |
closure(buffer) | |
} else if (bytesRead > 0) { | |
byte[] data = new byte[bytesRead] | |
System.arraycopy(buffer, 0, data, 0, bytesRead) | |
closure(data) | |
} | |
} | |
} | |
int pad = 10 | |
String all = new File(/C:\dev\cp\PRPS\admin\wsdl_all.txt/).text | |
new File(/C:\dev\cp\PRPS\admin\wsdl_all_fixed)2.txt/) << all | |
int counter = 0 | |
new File(/C:\dev\cp\PRPS\admin\wsdl_all_fixed.txt/).withInputStream { is -> | |
is.eachChunk(2) { bytes -> | |
if (!isValidUTF8(bytes)) { | |
println "${new String(bytes, 'windows-1252')} is an illegal character at index $counter: ${all[(counter - pad)..(counter + pad)]}" | |
} | |
counter += 1 | |
} | |
} | |
boolean isValidUTF8( byte[] input ) { | |
try { | |
cs.decode(ByteBuffer.wrap(input)); | |
return true; | |
} catch(CharacterCodingException e){ | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment