Created
October 3, 2017 18:05
-
-
Save ottomata/9e6e0bc06ecce010eb94981305c50062 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.apache.hadoop.fs.{FileSystem, Path, FileChecksum, ChecksumFileSystem, LocalFileSystem} | |
| import java.net.URI | |
| val hdfs = FileSystem.get(sc.hadoopConfiguration) | |
| val localfs = FileSystem.get(new URI("file:///"), sc.hadoopConfiguration) | |
| val lfs = new LocalFileSystem(localfs.asInstanceOf[ChecksumFileSystem]) | |
| // /user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/000063_0 MD5-of-0MD5-of-512CRC32C 000002000000000000000000e587c2281fb4dd94e55fd8356476c0f7 | |
| val hdfsPath = new Path("/user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/000063_0") | |
| val hdfsCrcPath = new Path("/user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/.000063_0") | |
| val hdfsChecksum = hdfs.getFileChecksum(hdfsPath) | |
| val hdfsCrcBytes = hdfsChecksum.getBytes() | |
| val crcLength = hdfsChecksum.getLength() | |
| val localPath = new Path("file:///tmp/otto/ctl1/misc1/misc2/000063_0") | |
| val localCrcPath = new Path("file:///tmp/otto/ctl1/misc1/misc2/.000063_0.crc") | |
| val localChecksum = localfs.getFileChecksum(localPath) | |
| val f = localfs.open(localCrcPath) | |
| val localCrcBytes = IOUtils.toByteArray(f) | |
| f.close() | |
| val f = hdfs.open(hdfsCrcPath) | |
| val hdfsCrcBytes = IOUtils.toByteArray(f) | |
| f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment