Created
July 22, 2017 15:12
-
-
Save masayuki038/6c8847da0eed2f5ca34662a6badf52c1 to your computer and use it in GitHub Desktop.
SampleParquetReader.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package net.wrap_trap.parquet_sample3; | |
| import org.apache.commons.lang3.builder.ToStringBuilder; | |
| import org.apache.hadoop.conf.Configuration; | |
| import org.apache.hadoop.fs.Path; | |
| import org.apache.parquet.column.ColumnDescriptor; | |
| import org.apache.parquet.column.page.*; | |
| import org.apache.parquet.hadoop.ParquetFileReader; | |
| import org.apache.parquet.hadoop.metadata.BlockMetaData; | |
| import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; | |
| import org.apache.parquet.hadoop.metadata.ParquetMetadata; | |
| import org.apache.parquet.schema.MessageType; | |
| import java.io.IOException; | |
| import java.util.List; | |
| public class SampleParquetReader { | |
| public static void main(String[] args) throws Exception { | |
| execute("D:\\development\\repository\\git\\drill\\sample-data\\nationsMF\\nationsMF.parquet"); | |
| } | |
| protected static void execute(String path) throws IOException { | |
| Configuration conf = new Configuration(); | |
| Path inPath = new Path(path); | |
| ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inPath); | |
| MessageType schema = metaData.getFileMetaData().getSchema(); | |
| dump(conf, metaData, schema, inPath); | |
| } | |
| protected static void dump(Configuration conf, ParquetMetadata metaData, MessageType schema, Path inPath) throws IOException { | |
| List<BlockMetaData> blocks = metaData.getBlocks(); | |
| List<ColumnDescriptor> columns = schema.getColumns(); | |
| ParquetFileReader reader = null; | |
| for (BlockMetaData block : blocks) { | |
| System.out.println(ToStringBuilder.reflectionToString(block)); | |
| System.out.println(); | |
| List<ColumnChunkMetaData> columnChunkMetadataList = block.getColumns(); | |
| for (ColumnChunkMetaData columnChunkMetadata : columnChunkMetadataList) { | |
| System.out.println(ToStringBuilder.reflectionToString(columnChunkMetadata)); | |
| reader = new ParquetFileReader(conf, inPath, blocks, columns); | |
| PageReadStore store = reader.readNextRowGroup(); | |
| while (store != null) { | |
| for (ColumnDescriptor column: columns) { | |
| PageReader pageReader = store.getPageReader(column); | |
| long valueCount = pageReader.getTotalValueCount(); | |
| int maxRepetitionLevel = column.getMaxRepetitionLevel(); | |
| int maxDefinitionLevel = column.getMaxDefinitionLevel(); | |
| System.out.println(String.format("column: %s, valueCount: %d, maxRepetitionLevel: %d, maDefinitionLevel: %d", column.getPath(), valueCount, maxRepetitionLevel, maxDefinitionLevel)); | |
| DictionaryPage dict = pageReader.readDictionaryPage(); | |
| if (dict != null) { | |
| System.out.println("Dictionary Size: " + dict.getDictionarySize()); | |
| System.out.println("Dictionary Encoding: " + dict.getEncoding()); | |
| } | |
| DataPage page = pageReader.readPage(); | |
| for (long count = 0L; page != null; count++) { | |
| System.out.println("page: " + count); | |
| page.accept(new DataPage.Visitor<Void>() { | |
| @Override | |
| public Void visit(DataPageV1 pageV1) { | |
| System.out.println("Repetition Level Encoding: " + pageV1.getRlEncoding()); | |
| System.out.println("Definition Level Encoding: " + pageV1.getDlEncoding()); | |
| System.out.println("Value Encoding: " + pageV1.getValueEncoding()); | |
| return null; | |
| } | |
| @Override | |
| public Void visit(DataPageV2 pageV2) { | |
| System.out.println("Value Encoding: " + pageV2.getDataEncoding()); | |
| return null; | |
| } | |
| }); | |
| System.out.println("Uncompressed Size: " + page.getUncompressedSize()); | |
| System.out.println("Value Count: " + page.getValueCount()); | |
| page = pageReader.readPage(); | |
| System.out.println(); | |
| } | |
| } | |
| System.out.println(); | |
| store = reader.readNextRowGroup(); | |
| } | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment