Last active
October 1, 2021 05:29
-
-
Save gyakkun/da7f1ee905b4415066ab5718a4e0a1dc to your computer and use it in GitHub Desktop.
Pattern match in hex, Sunday algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package moe.gyakkun.test; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.RandomAccessFile; | |
import java.nio.channels.FileChannel; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
public class Test { | |
public static void main(String[] args) throws IOException { | |
long timing = System.currentTimeMillis(); | |
// final String fileName = "E:\\[ToBak]\\Downloads\\Promotion Video.ISO"; | |
final String fileName = "E:\\VTS_01_1.VOB"; | |
final String outputFileNamePrefix = "E:\\PART_"; | |
RandomAccessFile raf = new RandomAccessFile(fileName, "r"); | |
long fileOffset = 0, fileLen = raf.length(); | |
final byte[] MAGIC_NUMBER_PREFIX = new byte[]{0x00, 0x00, 0x01, (byte) 0xBA}; | |
final byte[] MAGIC_NUMBER_TRAILING = new byte[]{0x00, 0x00, 0x01, (byte) 0xB9}; | |
// Sunday Algorithm | |
long[] prefixSkip = new long[256]; | |
Arrays.fill(prefixSkip, 5); | |
prefixSkip[0x00] = 3; | |
prefixSkip[0x01] = 2; | |
prefixSkip[0xBA] = 1; | |
long[] trailingSkip = new long[256]; | |
Arrays.fill(trailingSkip, 5); | |
trailingSkip[0x00] = 3; | |
trailingSkip[0x01] = 2; | |
trailingSkip[0xB9] = 1; | |
boolean isPrefix = true; | |
List<Long> prefixOffsetList = new ArrayList<>(), trailingOffsetList = new ArrayList<>(); | |
final int STEP = 1 << 20; | |
byte[] buf = new byte[STEP]; | |
outer: | |
while (fileOffset <= fileLen) { | |
raf.seek(fileOffset); | |
int bufLen = raf.read(buf); | |
if (bufLen == -1) break outer; | |
int bufOffset = 0; | |
inner: | |
while (bufOffset + 4 <= bufLen) { | |
byte[] toCompare = isPrefix ? MAGIC_NUMBER_PREFIX : MAGIC_NUMBER_TRAILING; | |
long[] toSkip = isPrefix ? prefixSkip : trailingSkip; | |
List<Long> toAdd = isPrefix ? prefixOffsetList : trailingOffsetList; | |
for (int i = 0; i < 4; i++) { | |
if (buf[bufOffset + i] != toCompare[i]) { | |
if (bufOffset + 4 >= bufLen) break inner; | |
bufOffset += toSkip[buf[bufOffset + 4] & 0xff]; | |
continue inner; | |
} | |
} | |
toAdd.add(fileOffset + bufOffset); | |
bufOffset += 4; | |
isPrefix = !isPrefix; | |
} | |
if ((trailingOffsetList.size() == 0 || trailingOffsetList.get(trailingOffsetList.size() - 1) != (fileOffset + STEP - 4)) && | |
(prefixOffsetList.size() == 0 || prefixOffsetList.get(prefixOffsetList.size() - 1) != (fileOffset + STEP - 4))) { | |
fileOffset += STEP - 4; | |
} else { | |
fileOffset += STEP; | |
} | |
} | |
System.out.println(prefixOffsetList); | |
System.out.println(trailingOffsetList); | |
if (trailingOffsetList.size() == prefixOffsetList.size() - 1) { | |
trailingOffsetList.add(raf.length() - 4); | |
} | |
FileChannel fisChan = raf.getChannel(); | |
for (int i = 0; i < trailingOffsetList.size(); i++) { | |
FileOutputStream fos = new FileOutputStream(outputFileNamePrefix + String.format("%02d", i) + ".mpg"); | |
FileChannel fosChan = fos.getChannel(); | |
long targetLen = trailingOffsetList.get(i) - prefixOffsetList.get(i) + 4; | |
fisChan.transferTo(prefixOffsetList.get(i), targetLen, fosChan); | |
fosChan.close(); | |
fos.close(); | |
} | |
fisChan.close(); | |
raf.close(); | |
timing = System.currentTimeMillis() - timing; | |
System.err.println("TIMING: " + timing + "ms."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment