Created
March 8, 2022 01:17
-
-
Save tallpeak/542605d4605c2aeb402908e191d5c04b to your computer and use it in GitHub Desktop.
parse a fixed-width file and convert to tab-separated (fields trimmed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.*; | |
import java.nio.charset.StandardCharsets; | |
import java.util.ArrayList; | |
import java.util.Calendar; | |
import java.util.zip.GZIPInputStream; | |
public final class Superfile { | |
public static String SuperfileToTSV(String inputFileName, String outputFileName) { | |
try { | |
FileInputStream instream = new FileInputStream(inputFileName); | |
InputStreamReader rdr; | |
GZIPInputStream gz = null; | |
if (inputFileName.endsWith(".gz")) { | |
gz = new GZIPInputStream(instream, 4096); | |
rdr = new InputStreamReader(gz, "ibm437"); //aliases: ibm437 437 ibm-437 cspc8codepage437 cp437 windows-437 | |
} else { | |
rdr = new InputStreamReader(instream, "ibm437"); //aliases: ibm437 437 ibm-437 cspc8codepage437 cp437 windows-437 | |
} | |
BufferedReader in = new BufferedReader(rdr); | |
FileOutputStream outFile = new FileOutputStream(outputFileName); | |
OutputStreamWriter outStream = new OutputStreamWriter(outFile, "UTF-8"); | |
short[] fldEnds = {2, 10, 16, 21, 28, 29, 59, 89, 119, | |
149, 162, 167, 171, 201, 211, 220, 229, 233, 237, | |
245, 253, 261, 269, 277, 282, 285, 288, 293, 305, | |
314, 325, 328, 345, 350}; | |
StringBuilder sb = new StringBuilder(400000); | |
String ln = in.readLine(); | |
int rowsInBuffer = 0; | |
while (ln != null && ln.length() > 300) { | |
int pstart = 0; | |
for (int pend : fldEnds) { | |
sb.append(ln.substring(pstart, pend).trim()); | |
sb.append('\t'); | |
pstart = pend; | |
} | |
sb.append("\n"); | |
if (++rowsInBuffer > 999) { | |
outStream.write(sb.toString()); | |
sb.setLength(0); | |
rowsInBuffer = 0; | |
} | |
ln = in.readLine(); | |
} | |
if (rowsInBuffer > 0) { | |
outStream.write(sb.toString()); | |
} | |
if (gz != null) { | |
gz.close(); | |
} | |
instream.close(); | |
outStream.close(); | |
outFile.close(); | |
} catch (Exception x) { | |
// IOException, FileNotFoundException, UnsupportedEncodingException | |
return "Error: Exception=" + x.getMessage(); | |
} | |
// next steps (done in CFML): | |
// copy stg_superfile from '[outputFileName]' with (format 'text'); | |
// then select * from import_superfile() | |
// then call superfile_post_import() | |
return "Success: Conversion completed"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment