Skip to content

Instantly share code, notes, and snippets.

@ottomata
Created October 3, 2017 18:05
Show Gist options
  • Select an option

  • Save ottomata/9e6e0bc06ecce010eb94981305c50062 to your computer and use it in GitHub Desktop.

Select an option

Save ottomata/9e6e0bc06ecce010eb94981305c50062 to your computer and use it in GitHub Desktop.
import org.apache.hadoop.fs.{FileSystem, Path, FileChecksum, ChecksumFileSystem, LocalFileSystem}
import java.net.URI
val hdfs = FileSystem.get(sc.hadoopConfiguration)
val localfs = FileSystem.get(new URI("file:///"), sc.hadoopConfiguration)
val lfs = new LocalFileSystem(localfs.asInstanceOf[ChecksumFileSystem])
// /user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/000063_0 MD5-of-0MD5-of-512CRC32C 000002000000000000000000e587c2281fb4dd94e55fd8356476c0f7
val hdfsPath = new Path("/user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/000063_0")
val hdfsCrcPath = new Path("/user/otto/external/webrequest_sampled_1024/webrequest_source=misc/year=2017/month=4/day=23/hour=2/.000063_0")
val hdfsChecksum = hdfs.getFileChecksum(hdfsPath)
val hdfsCrcBytes = hdfsChecksum.getBytes()
val crcLength = hdfsChecksum.getLength()
val localPath = new Path("file:///tmp/otto/ctl1/misc1/misc2/000063_0")
val localCrcPath = new Path("file:///tmp/otto/ctl1/misc1/misc2/.000063_0.crc")
val localChecksum = localfs.getFileChecksum(localPath)
val f = localfs.open(localCrcPath)
val localCrcBytes = IOUtils.toByteArray(f)
f.close()
val f = hdfs.open(hdfsCrcPath)
val hdfsCrcBytes = IOUtils.toByteArray(f)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment