Created
March 1, 2017 19:09
-
-
Save ddkaiser/bde327d7889dd8cf58f76dcc420f3aca to your computer and use it in GitHub Desktop.
Example minimal ORC file writing. Non-distributed (not require YARN) and can run in off-cluster process, with appropriate HDFS and ORC client libs installed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example - MINIMAL IMPLEMENTATION - of simple ORC file writing. | |
// Non-distributed (not require YARN) and can run in off-cluster process. | |
// Requires that the host has appropriate HDFS and ORC client libs installed, plus connectivity to the cluster. | |
package org.dkaiser.orctest; | |
import java.io.IOException; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.FileSystem; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.hive.ql.io.orc.CompressionKind; | |
import org.apache.hadoop.hive.ql.io.orc.OrcFile; | |
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy; | |
import org.apache.hadoop.hive.ql.io.orc.Writer; | |
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | |
public class Driver { | |
public static class Row { | |
Integer int1; | |
Long long1; | |
String firstName; | |
String lastName; | |
String city; | |
String state; | |
String zip9; | |
public Row(int val, long l, String fn, String ln, String c, String s, String z) { | |
this.int1 = val; | |
this.long1 = l; | |
this.firstName = fn; | |
this.lastName = ln; | |
this.city = c; | |
this.state = s; | |
this.zip9 = z; | |
} | |
} | |
public static void main(String[] args) { | |
Configuration conf = new Configuration(false); | |
conf.set("fs.defaultFS", "hdfs://sandbox.hortonworks.com:8020"); | |
ObjectInspector inspector; | |
synchronized (Driver.class) { | |
inspector = ObjectInspectorFactory.getReflectionObjectInspector( | |
Row.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); | |
} | |
try { | |
FileSystem fs = FileSystem.get(conf); | |
Path path = new Path("/tmp/file.orc"); | |
if (fs.exists(path)) { | |
fs.delete(path, true); | |
} | |
Writer writer = OrcFile.createWriter(path, | |
OrcFile.writerOptions(conf) | |
.fileSystem(fs) | |
.inspector(inspector) | |
.blockPadding(false) | |
.compress(CompressionKind.NONE) | |
.encodingStrategy(EncodingStrategy.SPEED) | |
// .stripeSize(100000) | |
// .bufferSize(10000) | |
); | |
writer.addRow(new Row(111, 1111L, "John", "Doe", "Buffalo", "NY", "12345-6789")); | |
writer.addRow(new Row(112, 2222L, "Henry", "Ford", "Detroit", "MI", "12345-6789")); | |
writer.addRow(new Row(113, 3333L, "Walter", "Cronkite", "New York", "NY", "12345-6789")); | |
writer.addRow(new Row(114, 4444L, "Al", "Gore", "Nashville", "TN", "12345-6789")); | |
writer.close(); | |
} catch (IOException ex) { | |
System.err.println("Error: Failure writing values to OrcFile."); | |
ex.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment