Created
December 12, 2014 23:42
-
-
Save schierlm/607929779172b1c94ef2 to your computer and use it in GitHub Desktop.
Slightly optimized version of https://github.com/nunobrito/utils/blob/a56b2352560788dd853fafc2e57382dca15e74ee/Utils/src/utils/files.java#L620
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package utils; | |
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.io.RandomAccessFile; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Random; | |
import org.junit.Assert; | |
import org.junit.Test; | |
public class GetLastLineFast { | |
/** | |
* Returns the last line from a given text file. This method is particularly | |
* well suited for very large text files that contain millions of text lines | |
* since it will just seek the end of the text file and seek the last line | |
* indicator. Please use only for large sized text files. | |
* | |
* @param file | |
* A file on disk | |
* @return The last line if available or an empty string if nothing was | |
* found | |
*/ | |
public static String getLastLineFast(final File file) { | |
// file needs to exist | |
if (file.exists() == false || file.isDirectory()) { | |
return ""; | |
} | |
// avoid empty files | |
if (file.length() <= 2) { | |
return ""; | |
} | |
// open the file for read-only mode | |
try (RandomAccessFile fileAccess = new RandomAccessFile(file, "r")) { | |
char breakLine = '\n'; | |
// offset of the current filesystem block - start with the last one | |
long blockStart = (file.length() - 1) / 4096 * 4096; | |
// hold the current block | |
byte[] currentBlock = new byte[(int) (file.length() - blockStart)]; | |
// later (previously read) blocks | |
List<byte[]> laterBlocks = new ArrayList<byte[]>(); | |
while (blockStart >= 0) { | |
fileAccess.seek(blockStart); | |
fileAccess.readFully(currentBlock); | |
// ignore the last 2 bytes of the block if it is the first one | |
int lengthToScan = currentBlock.length - (laterBlocks.isEmpty() ? 2 : 0); | |
for (int i = lengthToScan - 1; i >= 0; i--) { | |
if (currentBlock[i] == breakLine) { | |
// we found our end of line! | |
StringBuilder result = new StringBuilder(); | |
// RandomAccessFile#readLine uses ISO-8859-1, therefore | |
// we do here too | |
result.append(new String(currentBlock, i + 1, currentBlock.length - (i + 1), "ISO-8859-1")); | |
for (byte[] laterBlock : laterBlocks) { | |
result.append(new String(laterBlock, "ISO-8859-1")); | |
} | |
// maybe we had a newline at end of file? Strip it. | |
if (result.charAt(result.length() - 1) == breakLine) { | |
// newline can be \r\n or \n, so check which one to strip | |
int newlineLength = result.charAt(result.length() - 2) == '\r' ? 2 : 1; | |
result.setLength(result.length() - newlineLength); | |
} | |
return result.toString(); | |
} | |
} | |
// no end of line found - we need to read more | |
laterBlocks.add(0, currentBlock); | |
blockStart -= 4096; | |
currentBlock = new byte[4096]; | |
} | |
} catch (Exception ex) { | |
ex.printStackTrace(); | |
} | |
// oops, no line break found or some exception happened | |
return ""; | |
} | |
// test method: original duration 65 seconds, new duration 5 seconds | |
@Test | |
public void testLastLineGetting() throws IOException { | |
for (String nl : Arrays.asList("\n", "\r\n")) { | |
testLastLineGetting(true, nl); | |
testLastLineGetting(false, nl); | |
} | |
} | |
private void testLastLineGetting(boolean newlineAtEOF, String newline) throws IOException { | |
File tmpfile = File.createTempFile("~unittest", ".txt"); | |
try { | |
for (int i = 1; i < 1000; i++) { | |
String line = testCreateRandomLine(i); | |
try (BufferedWriter bw = new BufferedWriter(new FileWriter(tmpfile, true))) { | |
if (!newlineAtEOF) | |
bw.write(newline); | |
bw.write(line); | |
if (newlineAtEOF) | |
bw.write(newline); | |
} | |
if (i > 1) { | |
// does not work when file has only one line; but original | |
// code did not either | |
Assert.assertEquals(line, getLastLineFast(tmpfile)); | |
} | |
} | |
} finally { | |
if (!tmpfile.delete()) | |
tmpfile.deleteOnExit(); | |
} | |
} | |
private static Random rnd = new Random(); | |
private String testCreateRandomLine(int lineNumber) { | |
StringBuilder sb = new StringBuilder(); | |
sb.append("This is a random line " + lineNumber + " with some garbage: "); | |
char[] chars = new char[rnd.nextInt(rnd.nextBoolean() ? 10000 : 150) + 10]; | |
for (int i = 0; i < chars.length; i++) { | |
chars[i] = (char) ((rnd.nextBoolean() ? 'A' : 'a') + rnd.nextInt(26)); | |
} | |
sb.append(chars); | |
return sb.toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment