Skip to content

Instantly share code, notes, and snippets.

@masayuki038
Created July 22, 2017 15:38
Show Gist options
  • Save masayuki038/4025be686800da424a97aad4324e65c8 to your computer and use it in GitHub Desktop.
Save masayuki038/4025be686800da424a97aad4324e65c8 to your computer and use it in GitHub Desktop.
SampleParquetReader2.java
package net.wrap_trap.parquet_sample3;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.ColumnReader;
import org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.parquet.column.impl.ColumnReaderImpl;
import org.apache.parquet.column.page.*;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.MessageType;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.List;
public class SampleParquetReader2 {
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder();
public static void main(String[] args) throws Exception {
execute("D:\\development\\repository\\git\\drill\\sample-data\\nationsMF\\nationsMF.parquet");
}
protected static void execute(String path) throws IOException {
Configuration conf = new Configuration();
Path inPath = new Path(path);
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inPath);
MessageType schema = metaData.getFileMetaData().getSchema();
dump(conf, metaData, schema, inPath);
}
protected static void dump(Configuration conf, ParquetMetadata metaData, MessageType schema, Path inPath) throws IOException {
List<BlockMetaData> blocks = metaData.getBlocks();
List<ColumnDescriptor> columns = schema.getColumns();
ParquetFileReader reader = null;
for (ColumnDescriptor column : columns) {
System.out.println(ToStringBuilder.reflectionToString(column));
reader = new ParquetFileReader(conf, inPath, blocks, columns);
PageReadStore store = reader.readNextRowGroup();
while (store != null) {
ColumnReadStoreImpl columnReadStoreImpl = new ColumnReadStoreImpl(store, new DumpGroupConverter(), schema, "");
int maxDefinitionLevel = column.getMaxDefinitionLevel();
ColumnReader columnReader = columnReadStoreImpl.getColumnReader(column);
for (long i = 0L, e = columnReader.getTotalValueCount(); i < e; ++i) {
int repetitionLevel = columnReader.getCurrentRepetitionLevel();
int definitionLevel = columnReader.getCurrentDefinitionLevel();
if (definitionLevel == maxDefinitionLevel) {
switch (column.getType()) {
case BINARY:
System.out.println(binaryToString(columnReader.getBinary()));
break;
case BOOLEAN:
System.out.println(columnReader.getBoolean());
break;
case DOUBLE:
System.out.println(columnReader.getDouble());
break;
case FLOAT:
System.out.println(columnReader.getFloat());
break;
case INT32:
System.out.println(columnReader.getInteger());
break;
case INT64:
System.out.println(columnReader.getLong());
break;
case INT96:
System.out.println(binaryToBigInteger(columnReader.getBinary()));
break;
case FIXED_LEN_BYTE_ARRAY:
System.out.println(binaryToString(columnReader.getBinary()));
break;
}
} else {
System.out.println("<null>");
}
columnReader.consume();
}
System.out.println();
store = reader.readNextRowGroup();
}
}
}
public static String binaryToString(Binary value) {
byte[] data = value.getBytes();
if (data == null) return null;
try {
CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer());
return buffer.toString();
} catch (Throwable th) {
}
return "<bytes...>";
}
public static BigInteger binaryToBigInteger(Binary value) {
byte[] data = value.getBytes();
if (data == null) return null;
return new BigInteger(data);
}
private static final class DumpGroupConverter extends GroupConverter {
@Override public void start() { }
@Override public void end() { }
@Override public Converter getConverter(int fieldIndex) { return new DumpConverter(); }
}
private static final class DumpConverter extends PrimitiveConverter {
@Override public GroupConverter asGroupConverter() { return new DumpGroupConverter(); }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment