Skip to content

Instantly share code, notes, and snippets.

@ddkaiser
Created March 1, 2017 19:09
Show Gist options
  • Save ddkaiser/bde327d7889dd8cf58f76dcc420f3aca to your computer and use it in GitHub Desktop.
Save ddkaiser/bde327d7889dd8cf58f76dcc420f3aca to your computer and use it in GitHub Desktop.
Example minimal ORC file writing. Non-distributed (not require YARN) and can run in off-cluster process, with appropriate HDFS and ORC client libs installed.
// Example - MINIMAL IMPLEMENTATION - of simple ORC file writing.
// Non-distributed (not require YARN) and can run in off-cluster process.
// Requires that the host has appropriate HDFS and ORC client libs installed, plus connectivity to the cluster.
package org.dkaiser.orctest;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
public class Driver {
public static class Row {
Integer int1;
Long long1;
String firstName;
String lastName;
String city;
String state;
String zip9;
public Row(int val, long l, String fn, String ln, String c, String s, String z) {
this.int1 = val;
this.long1 = l;
this.firstName = fn;
this.lastName = ln;
this.city = c;
this.state = s;
this.zip9 = z;
}
}
public static void main(String[] args) {
Configuration conf = new Configuration(false);
conf.set("fs.defaultFS", "hdfs://sandbox.hortonworks.com:8020");
ObjectInspector inspector;
synchronized (Driver.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(
Row.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
try {
FileSystem fs = FileSystem.get(conf);
Path path = new Path("/tmp/file.orc");
if (fs.exists(path)) {
fs.delete(path, true);
}
Writer writer = OrcFile.createWriter(path,
OrcFile.writerOptions(conf)
.fileSystem(fs)
.inspector(inspector)
.blockPadding(false)
.compress(CompressionKind.NONE)
.encodingStrategy(EncodingStrategy.SPEED)
// .stripeSize(100000)
// .bufferSize(10000)
);
writer.addRow(new Row(111, 1111L, "John", "Doe", "Buffalo", "NY", "12345-6789"));
writer.addRow(new Row(112, 2222L, "Henry", "Ford", "Detroit", "MI", "12345-6789"));
writer.addRow(new Row(113, 3333L, "Walter", "Cronkite", "New York", "NY", "12345-6789"));
writer.addRow(new Row(114, 4444L, "Al", "Gore", "Nashville", "TN", "12345-6789"));
writer.close();
} catch (IOException ex) {
System.err.println("Error: Failure writing values to OrcFile.");
ex.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment