brew install lzop lzo
- Compile Hadoop LZO library:
git clone https://github.com/twitter/hadoop-lzo.git
cd hadoop-lzo
env JAVA_HOME=$JAVA_HOME C_INCLUDE_PATH=/opt/local/include LIBRARY_PATH=/opt/local/lib ant clean compile-native test tar
- Place compiled Hadoop LZO library into Hadoop directory:
cp build/hadoop-lzo-0.4.17-SNAPSHOT/hadoop-lzo-0.4.17-SNAPSHOT.jar $HADOOP_PREFIX/libexec/lib
- Store native libraries to Hadoop directory:
tar -cBf - -C build/hadoop-lzo-0.4.17-SNAPSHOT/lib/native . | tar -xBvf - -C $HADOOP_PREFIX/libexec/lib/native
- Add following properties in
conf/core-site.xml
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
- Open
conf/hadoop-env.sh
:
Replace
# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=
With:
# Extra Java CLASSPATH elements. Optional.
export HADOOP_CLASSPATH="/usr/local/Cellar/hadoop/1.1.2/libexec/lib/hadoop-lzo-0.4.17-SNAPSHOT.jar"
export JAVA_LIBRARY_PATH="/usr/local/Cellar/hadoop/1.1.2/libexec/lib/native:/usr/local/Cellar/hadoop/1.1.2/libexec/lib/native/Mac_OS_X-x86_64-64"
Note. Make sure /usr/local/Cellar/hadoop/1.1.2/libexec/lib/hadoop-lzo-0.4.17-SNAPSHOT.jar
and /usr/local/Cellar/hadoop/1.1.2/libexec/lib/native/Mac_OS_X-x86_64-64
exists and adjust these paths according to your setup.
- Restart Hadoop:
$HADOOP_PREFIX/bin/stop-all.sh
$HADOOP_PREFIX/bin/start-all.sh
sudo apt-get install lzop liblzo2-dev
sudo apt-get install ant
- Compile Hadoop LZO library:
git clone https://github.com/twitter/hadoop-lzo.git
cd hadoop-lzo
env JAVA_HOME=$JAVA_HOME C_INCLUDE_PATH=/opt/local/include LIBRARY_PATH=/opt/local/lib ant clean compile-native test tar
tar -cBf - -C build/hadoop-lzo-0.4.17-SNAPSHOT/lib/native . | tar -xBvf - -C $HADOOP_PREFIX/lib/native
- Add following properties in
conf/core-site.xml
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
- Open
conf/hadoop-env.sh
:
Replace
# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=
With:
# Extra Java CLASSPATH elements. Optional.
export HADOOP_CLASSPATH="/usr/local/hadoop/lib/hadoop-lzo-0.4.17-SNAPSHOT.jar"
export JAVA_LIBRARY_PATH="/usr/local/hadoop/lib/native:/usr/local/hadoop/lib/native/Linux-amd64-64"
Note. Make sure /usr/local/hadoop/lib/hadoop-lzo-0.4.17-SNAPSHOT.jar
and /usr/local/hadoop/lib/native:/usr/local/hadoop/lib/native/Linux-amd64-64
exists and adjust these paths according to your setup.
- Restart Hadoop:
$HADOOP_PREFIX/bin/stop-all.sh
$HADOOP_PREFIX/bin/start-all.sh
lzop Posts.xml
hadoop fs -mkdir input
hadoop fs -put Posts.xml.lzo input
hadoop jar $HADOOP_PREFIX/libexec/lib/hadoop-lzo-0.4.17-SNAPSHOT.jar com.hadoop.compression.lzo.LzoIndexer input/Posts.xml.lzo
When listing input
directory contents, you should see folowing output:
$ hadoop fs -ls input
Found 2 items
-rw-r--r-- 1 hduser supergroup 217682877 2013-05-24 17:09 /user/hduser/input/Posts.xml.lzo
-rw-r--r-- 1 hduser supergroup 14376 2013-05-24 17:11 /user/hduser/input/Posts.xml.lzo.index