Created
July 9, 2016 08:47
-
-
Save samklr/e7eda8f345199301dfdb4097a3a188a8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| val hdfs: org.apache.hadoop.fs.FileSystem = | |
| org.apache.hadoop.fs.FileSystem.get( | |
| new org.apache.hadoop.conf.Configuration()) | |
| val hadoopPath= new org.apache.hadoop.fs.Path("hdfs://localhost:9000/tmp") | |
| val recursive = false | |
| val ri = hdfs.listFiles(hadoopPath, recursive) | |
| val it = new Iterator[org.apache.hadoop.fs.LocatedFileStatus]() { | |
| override def hasNext = ri.hasNext | |
| override def next() = ri.next() | |
| } | |
| // Materialize iterator | |
| val files = it.toList | |
| println(files.size) | |
| println(files.map(_.getLen).sum) | |
| //tune size | |
| val blockSize = 1024 * 1024 * 16 // 16MB | |
| sc.hadoopConfiguration.setInt( "dfs.blocksize", blockSize ) | |
| sc.hadoopConfiguration.setInt( "parquet.block.size", blockSize ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment