Skip to content

Instantly share code, notes, and snippets.

@schierlm
Created December 12, 2014 23:42
Show Gist options
  • Save schierlm/607929779172b1c94ef2 to your computer and use it in GitHub Desktop.
Save schierlm/607929779172b1c94ef2 to your computer and use it in GitHub Desktop.
package utils;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.junit.Assert;
import org.junit.Test;
public class GetLastLineFast {
/**
* Returns the last line from a given text file. This method is particularly
* well suited for very large text files that contain millions of text lines
* since it will just seek the end of the text file and seek the last line
* indicator. Please use only for large sized text files.
*
* @param file
* A file on disk
* @return The last line if available or an empty string if nothing was
* found
*/
public static String getLastLineFast(final File file) {
// file needs to exist
if (file.exists() == false || file.isDirectory()) {
return "";
}
// avoid empty files
if (file.length() <= 2) {
return "";
}
// open the file for read-only mode
try (RandomAccessFile fileAccess = new RandomAccessFile(file, "r")) {
char breakLine = '\n';
// offset of the current filesystem block - start with the last one
long blockStart = (file.length() - 1) / 4096 * 4096;
// hold the current block
byte[] currentBlock = new byte[(int) (file.length() - blockStart)];
// later (previously read) blocks
List<byte[]> laterBlocks = new ArrayList<byte[]>();
while (blockStart >= 0) {
fileAccess.seek(blockStart);
fileAccess.readFully(currentBlock);
// ignore the last 2 bytes of the block if it is the first one
int lengthToScan = currentBlock.length - (laterBlocks.isEmpty() ? 2 : 0);
for (int i = lengthToScan - 1; i >= 0; i--) {
if (currentBlock[i] == breakLine) {
// we found our end of line!
StringBuilder result = new StringBuilder();
// RandomAccessFile#readLine uses ISO-8859-1, therefore
// we do here too
result.append(new String(currentBlock, i + 1, currentBlock.length - (i + 1), "ISO-8859-1"));
for (byte[] laterBlock : laterBlocks) {
result.append(new String(laterBlock, "ISO-8859-1"));
}
// maybe we had a newline at end of file? Strip it.
if (result.charAt(result.length() - 1) == breakLine) {
// newline can be \r\n or \n, so check which one to strip
int newlineLength = result.charAt(result.length() - 2) == '\r' ? 2 : 1;
result.setLength(result.length() - newlineLength);
}
return result.toString();
}
}
// no end of line found - we need to read more
laterBlocks.add(0, currentBlock);
blockStart -= 4096;
currentBlock = new byte[4096];
}
} catch (Exception ex) {
ex.printStackTrace();
}
// oops, no line break found or some exception happened
return "";
}
// test method: original duration 65 seconds, new duration 5 seconds
@Test
public void testLastLineGetting() throws IOException {
for (String nl : Arrays.asList("\n", "\r\n")) {
testLastLineGetting(true, nl);
testLastLineGetting(false, nl);
}
}
private void testLastLineGetting(boolean newlineAtEOF, String newline) throws IOException {
File tmpfile = File.createTempFile("~unittest", ".txt");
try {
for (int i = 1; i < 1000; i++) {
String line = testCreateRandomLine(i);
try (BufferedWriter bw = new BufferedWriter(new FileWriter(tmpfile, true))) {
if (!newlineAtEOF)
bw.write(newline);
bw.write(line);
if (newlineAtEOF)
bw.write(newline);
}
if (i > 1) {
// does not work when file has only one line; but original
// code did not either
Assert.assertEquals(line, getLastLineFast(tmpfile));
}
}
} finally {
if (!tmpfile.delete())
tmpfile.deleteOnExit();
}
}
private static Random rnd = new Random();
private String testCreateRandomLine(int lineNumber) {
StringBuilder sb = new StringBuilder();
sb.append("This is a random line " + lineNumber + " with some garbage: ");
char[] chars = new char[rnd.nextInt(rnd.nextBoolean() ? 10000 : 150) + 10];
for (int i = 0; i < chars.length; i++) {
chars[i] = (char) ((rnd.nextBoolean() ? 'A' : 'a') + rnd.nextInt(26));
}
sb.append(chars);
return sb.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment