Skip to content

Instantly share code, notes, and snippets.

@tallpeak
Created March 8, 2022 01:17
Show Gist options
  • Save tallpeak/542605d4605c2aeb402908e191d5c04b to your computer and use it in GitHub Desktop.
Save tallpeak/542605d4605c2aeb402908e191d5c04b to your computer and use it in GitHub Desktop.
parse a fixed-width file and convert to tab-separated (fields trimmed)
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.zip.GZIPInputStream;
public final class Superfile {
public static String SuperfileToTSV(String inputFileName, String outputFileName) {
try {
FileInputStream instream = new FileInputStream(inputFileName);
InputStreamReader rdr;
GZIPInputStream gz = null;
if (inputFileName.endsWith(".gz")) {
gz = new GZIPInputStream(instream, 4096);
rdr = new InputStreamReader(gz, "ibm437"); //aliases: ibm437 437 ibm-437 cspc8codepage437 cp437 windows-437
} else {
rdr = new InputStreamReader(instream, "ibm437"); //aliases: ibm437 437 ibm-437 cspc8codepage437 cp437 windows-437
}
BufferedReader in = new BufferedReader(rdr);
FileOutputStream outFile = new FileOutputStream(outputFileName);
OutputStreamWriter outStream = new OutputStreamWriter(outFile, "UTF-8");
short[] fldEnds = {2, 10, 16, 21, 28, 29, 59, 89, 119,
149, 162, 167, 171, 201, 211, 220, 229, 233, 237,
245, 253, 261, 269, 277, 282, 285, 288, 293, 305,
314, 325, 328, 345, 350};
StringBuilder sb = new StringBuilder(400000);
String ln = in.readLine();
int rowsInBuffer = 0;
while (ln != null && ln.length() > 300) {
int pstart = 0;
for (int pend : fldEnds) {
sb.append(ln.substring(pstart, pend).trim());
sb.append('\t');
pstart = pend;
}
sb.append("\n");
if (++rowsInBuffer > 999) {
outStream.write(sb.toString());
sb.setLength(0);
rowsInBuffer = 0;
}
ln = in.readLine();
}
if (rowsInBuffer > 0) {
outStream.write(sb.toString());
}
if (gz != null) {
gz.close();
}
instream.close();
outStream.close();
outFile.close();
} catch (Exception x) {
// IOException, FileNotFoundException, UnsupportedEncodingException
return "Error: Exception=" + x.getMessage();
}
// next steps (done in CFML):
// copy stg_superfile from '[outputFileName]' with (format 'text');
// then select * from import_superfile()
// then call superfile_post_import()
return "Success: Conversion completed";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment