-
-
Save thebookworm101/4635280 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oct 17, 2012 - See https://github.com/medined/accumulo_stackscript for an even better script. Really ignore the stuff below. Go. Scoot. | |
Aug 28, 2012 - See http://affy.blogspot.com/2012/08/stackscript-for-accumulo-on-linode.html for a more concise method is configuring accumulo. I'll leave this gist unchanged for fans of history. | |
My goal was to get Accumulo running on a VirtualBox Ubuntu instance. I was successful using the following steps. If a line starts with $ then it is a command-line to execute. Note that you'll need to have sudo privilege. My username was 'ubuntu'. If you are using a different username, you'll need to change the process a little bit. I'll try to point out where. | |
https://issues.apache.org/jira/browse/ACCUMULO | |
########## | |
# Start a new VirtualBox instance using the Ubuntu 11.10 | |
# Desktop ISO with at least 4G RAM and at least 10G of | |
# disk space. | |
########## | |
########## | |
# For verification, you can display the OS release. | |
########## | |
$ cat /etc/lsb-release | |
DISTRIB_ID=Ubuntu | |
DISTRIB_RELEASE=11.10 | |
DISTRIB_CODENAME=oneiric | |
DISTRIB_DESCRIPTION="Ubuntu 11.10" | |
########## | |
# Download all of the packages you'll need. Hopefully, | |
# you have a fast download connection. | |
########## | |
$ sudo apt-get update | |
$ sudo apt-get upgrade | |
$ sudo apt-get install curl | |
$ sudo apt-get install git | |
$ sudo apt-get install maven2 | |
$ sudo apt-get install openssh-server openssh-client | |
$ sudo apt-get install openjdk-7-jdk | |
########## | |
# Switch to the new Java. On my system, it was | |
# the third option (marked '2' naturally) | |
########## | |
$ sudo update-alternatives --config java | |
########## | |
# Set the JAVA_HOME variable. I took the | |
# time to update my .bashrc script. | |
########## | |
$ export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-i386 | |
########## | |
# I stored the Accumulo source code into | |
# ~/workspace/accumulo. After compilation, you'll | |
# be working with a second Accumulo directory. By | |
# placing this 'original source' version into | |
# workspace it is nicely segregated. | |
########## | |
$ mkdir -p ~/workspace | |
$ cd ~/workspace | |
$ git clone https://github.com/apache/accumulo.git | |
$ cd accumulo | |
########## | |
# Now we can compile Accumulo which creates the | |
# accumulo-assemble-1.5.0-incubating-SNAPSHOT-dist.tar.gz | |
# file in the src/assemble/target directory. | |
# | |
# This step confused me because the Accumulo README | |
# mentions mvn assembly:single and I tried to use | |
# that Maven command. It is not needed, at least not | |
# in this situation. | |
########## | |
$ mvn package | |
########## | |
# Now we can download Cloudera's version of Hadoop. The | |
# first step is adding the repository. Note that oneiric | |
# is not explicitly supported as of 2011-Dec-20. So I am | |
# using the 'maverick' repository. | |
########## | |
# Create a repository list file. Add the two indented lines | |
# to the new file. | |
$ sudo vi /etc/apt/sources.list.d/cloudera.list | |
deb http://archive.cloudera.com/debian maverick-cdh3 contrib | |
deb-src http://archive.cloudera.com/debian maverick-cdh3 contrib | |
# Add public key | |
$ curl -s http://archive.cloudera.com/debian/archive.key | sudo apt-key add - | |
$ sudo apt-get update | |
# Install all of the Hadoop components. | |
$ sudo apt-get install hadoop-0.20 | |
$ sudo apt-get install hadoop-0.20-namenode | |
$ sudo apt-get install hadoop-0.20-datanode | |
$ sudo apt-get install hadoop-0.20-secondarynamenode | |
$ sudo apt-get install hadoop-0.20-jobtracker | |
$ sudo apt-get install hadoop-0.20-tasktracker | |
# Install zookeeper. It will automatically | |
# start. | |
sudo apt-get install hadoop-zookeeper-server | |
########## | |
# As an aside, you can use Ubuntu's service | |
# command to control zookeeper like this: | |
# sudo service hadoop-zookeeper-server start | |
########## | |
########## | |
# Now we can configure Pseudo-Distributed hadoop | |
# These steps were borrowed from | |
# http://hadoop.apache.org/common/docs/r0.20.2/quickstart.html | |
########## | |
# Set some environment variables. I added these to my | |
# .bashrc file. | |
$ export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-i386 | |
$ export HADOOP_HOME=/usr/lib/hadoop-0.20 | |
$ export ZOOKEEPER_HOME=/usr/lib/zookeeper | |
$ cd $HADOOP_HOME/conf | |
# Create the hadoop temp directory. It should not | |
# be in the /tmp directory because that directory | |
# disappears after each system restart. Something | |
# that is done a lot with virtual machines. | |
sudo mkdir /hadoop_tmp_dir | |
sudo chmod 777 hadoop_tmp_dir | |
# Replace the existing file with the indented lines. | |
$ sudo vi core-site.xml | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>hadoop.tmp.dir</name> | |
<value>/hadoop_tmp_dir</value> | |
</property> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://localhost:9000</value> | |
</property> | |
</configuration> | |
########## | |
# Notice that the dfs secondary http address is not | |
# the default in the XML below. I don't know what | |
# process was using the default, but I needed to | |
# change it to avoid the 'port already in use' message. | |
########## | |
# Replace the existing file with the indented lines. | |
$ sudo vi hdfs-site.xml | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>dfs.secondary.http.address</name> | |
<value>0.0.0.0:8002</value> | |
</property> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
</configuration> | |
# Replace the existing file with the indented lines. | |
$ sudo vi mapred-site.xml | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>mapred.job.tracker</name> | |
<value>localhost:9001</value> | |
</property> | |
</configuration> | |
# format the hadoop filesystem | |
$ hadoop namenode -format | |
########## | |
# Time to setup password-less ssh to localhost | |
########## | |
$ cd ~ | |
$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa | |
$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys | |
# If you want to test that the ssh works, do this. Then exit. | |
$ ssh localhost | |
# Do some zookeeper configuration. | |
$ echo "maxClientCnxns=100" | sudo tee -a $ZOOKEEPER_HOME/conf/zoo.cfg | |
$ cd ~ | |
$ export TAR_DIR=~/workspace/accumulo/src/assemble/target | |
$ tar xvzf $TAR_DIR/accumulo-1.5.0-incubating-SNAPSHOT-dist.tar.gz | |
# Add the following to your .bashrc file. | |
$ export ACCUMULO_HOME=~/accumulo-1.5.0-incubating-SNAPSHOT | |
$ cd $ACCUMULO_HOME/conf | |
########### | |
# I didn't see the metrics file mentioned in the README file but | |
# there was a complaint in a log file about its being missing. | |
########### | |
cp slaves.example slaves | |
cp masters.example masters | |
cp accumulo-env.sh.example accumulo-env.sh | |
cp accumulo-site.xml.example accumulo-site.xml | |
cp accumulo-metrics.xml.example accumulo-metrics.xml | |
# create the write-ahead directory. | |
cd .. | |
mkdir walogs | |
########### | |
# Configure for 4Gb RAM. I definitely recommend using more RAM | |
# if you have it. Since I am using a VirtualBox instance, I don't | |
# have much memory to play with. | |
########### | |
# Change these two parameters to reduce memory usage. | |
$ vi conf/accumulo-site.xml | |
tserver.memory.maps.max=256M | |
tserver.cache.index.size=128M | |
# Change (or add) the trace.password entry if the root password is | |
# not the default of "secret" | |
<property> | |
<name>trace.password</name> | |
<value>mypassword_for_root_user</value> | |
</property> | |
# Reduce the JVM memory. I have no real idea what these should be but these | |
# settings work. I consider them a magic formula. :) | |
vi conf/accumulo-env.sh | |
test -z "$ACCUMULO_TSERVER_OPTS" && export ACCUMULO_TSERVER_OPTS="${POLICY} -Xmx512m -Xms512m -Xss128k" | |
test -z "$ACCUMULO_MASTER_OPTS" && export ACCUMULO_MASTER_OPTS="${POLICY} -Xmx512m -Xms128m" | |
test -z "$ACCUMULO_MONITOR_OPTS" && export ACCUMULO_MONITOR_OPTS="${POLICY} -Xmx256m -Xms128m" | |
test -z "$ACCUMULO_GC_OPTS" && export ACCUMULO_GC_OPTS="-Xmx256m -Xms128m" | |
test -z "$ACCUMULO_LOGGER_OPTS" && export ACCUMULO_LOGGER_OPTS="-Xmx128m -Xms64m" | |
test -z "$ACCUMULO_GENERAL_OPTS" && export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75" | |
test -z "$ACCUMULO_OTHER_OPTS" && export ACCUMULO_OTHER_OPTS="-Xmx256m -Xms128m" | |
####### | |
####### | |
####### | |
####### | |
# REPEAT FOR EACH RESTART | |
# | |
# Since we are working inside a virtual machine, I found that | |
# some settings did not survive a shutdown or reboot. From this | |
# point on, repeat these command for each instance startup. | |
####### | |
# hadoop was installed as root. Therefore we need to | |
# change the ownership so that your username can | |
# write. IF YOU ARE NOT USING 'ubuntu', CHANGE THE | |
# COMMAND ACCORDINGLY. | |
$ sudo chown -R ubuntu:ubuntu /usr/lib/hadoop-0.20 | |
$ sudo chown -R ubuntu:ubuntu /var/run/hadoop-0.20 | |
$ sudo chown -R ubuntu:ubuntu /var/log/hadoop-0.20 | |
# Start hadoop. I remove the logs so that I can find errors | |
# faster when I iterate through configuration settings. | |
$ cd $HADOOP_HOME | |
$ rm -rf logs/* | |
$ bin/start-dfs.sh | |
$ bin/start-mapred.sh | |
# If desired, look at the hadoop jobs. Your output should look something | |
# like the intended lines. | |
$ jps | |
4017 JobTracker | |
4254 TaskTracker | |
30279 Main | |
9808 Jps | |
3517 NameNode | |
3737 DataNode | |
########## | |
# This is an optional step to prove that the NameNode is running. | |
# Use a web browser like Firefix if you can. | |
########## | |
$ wget http://localhost:50070/ | |
$ cat index.html | |
$ rm index.html | |
########## | |
# This is an optional step to prove that the JobTracker is running. | |
# Use a web browser like Firefix if you can. | |
########## | |
$ wget http://localhost:50030/ | |
$ cat index.html | |
$ rm index.html | |
########## | |
# This is an optional step to prove that a map-reduce job | |
# can be run. In other words, that hadoop is working. | |
########## | |
$ hadoop dfs -rmr input | |
$ hadoop fs -put $HADOOP_HOME/conf input | |
$ hadoop jar $HADOOP_HOME/hadoop-*-examples.jar grep input output 'dfs[a-z.]+' | |
$ hadoop fs -cat output/* | |
########### | |
# And now, the payoff. Let's get Accumulo to run. | |
########### | |
# Provide an instance (development) name and password (password) when asked. | |
$ cd $ACCUMULO_HOME | |
$ bin/accumulo init | |
# I remove the logs to make debugging easier. | |
rm -rf logs/* | |
bin/start-all.sh | |
########## | |
# This is an optional step to prove that Accumulo is running. | |
# Use a web browser like Firefox if you can. | |
########## | |
$ wget http://localhost:50095/ | |
$ cat index.html | |
$ rm index.html | |
# Check the logs directory. | |
$ cd logs | |
# Look for content in .err or .out files. The file sizes should all be zero. | |
$ ls -l *.err *.out | |
# Look for error messages. Ignore messages about the missing libNativeMap file. | |
$ grep ERROR * | |
# Start the Accumulo shell. If this works, see the README file for an example | |
# how to use the shell. | |
$ bin/accumulo shell -u root -p password | |
########### | |
# Do a little victory dance. You're now an Accumulo user! | |
########### | |
########## | |
# Building Accumulo Documentation | |
########## | |
$ sudo apt-get install texlive-latex-base | |
$ sudo apt-get install texlive-latex-extra | |
$ rm ./docs/accumulo_user_manual.pdf | |
$ mvn -Dmaven.test.skip=true prepare-package | |
cd docs/src/developer_manual | |
pdflatex developer_manual && pdflatex developer_manual && pdflatex developer_manual && pdflatex developer_manual | |
########## | |
# Reading Documentation | |
########## | |
http://incubator.apache.org/accumulo/user_manual_1.4-incubating | |
docs/src/developer_manual/developer_manual.pdf | |
$ ls -l docs/examples | |
########## | |
# Things to Try | |
########## | |
bin/accumulo org.apache.accumulo.server.util.ListInstances | |
WHY DOES THIS NPE? | |
bin/accumulo org.apache.accumulo.server.util.DumpTable batchtest1 | |
########## | |
# Running Accumulo Examples | |
########## | |
export EXAMPLE_JAR=lib/examples-simple-1.5.0-incubating-SNAPSHOT.jar | |
export EXAMPLE_PACKAGE=org.apache.accumulo.examples.simple | |
cd $ACCUMULO_HOME | |
export AINSTANCE=development | |
export AZOOKEEPERS=localhost | |
export AUSER=root | |
export APASSWORD=password | |
export AOPTIONS="$AINSTANCE $AZOOKEEPERS $AUSER $APASSWORD" | |
# --------------------------- | |
# Examples from README.batch | |
# --------------------------- | |
# start the command-line shell. | |
bin/accumulo shell -u root -p password | |
> setauths -u root -s exampleVis | |
> createtable batchtest1 | |
> exit | |
export TABLE=batchtest1 | |
export START=0 | |
export NUM=10000 | |
export VALUE_SIZE=50 | |
export MAX_MEMORY=20000000 | |
export MAX_LATENCY=500 | |
export NUM_THREADS=20 | |
export COLUMN_VISIBILITY=exampleVis | |
bin/accumulo $EXAMPLE_PACKAGE.client.SequentialBatchWriter $AOPTIONS $TABLE $START $NUM $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
export NUM=1000 | |
export MIN=0 | |
export MAX=10000 | |
export EXPECTED_VALUE_SIZE=50 | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchScanner $AOPTIONS $TABLE $NUM $MIN $MAX $EXPECTED_VALUE_SIZE $NUM_THREADS $COLUMN_VISIBILITY | |
# ---------------------------- | |
# Examples from README.bloom | |
# ---------------------------- | |
### create table without bloom filter. | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> setauths -u root -s exampleVis | |
> createtable bloom_test1 | |
bloom_test1> config -t bloom_test1 -s table.compaction.major.ratio=7 | |
bloom_test1> exit | |
export TABLE=bloom_test1 | |
export NUM=1000000 | |
export MIN=0 | |
export MAX=1000000000 | |
export VALUE_SIZE=50 | |
export MAX_MEMORY=2000000 | |
export MAX_LATENCY=60000 | |
export NUM_THREADS=20 | |
export COLUMN_VISIBILITY=exampleVis | |
# create a million records | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 7 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'flush -t bloom_test1 -w' | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 8 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'flush -t bloom_test1 -w' | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 9 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
### create table with bloom filter. | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable bloom_test2 | |
bloom_test2> config -t bloom_test2 -s table.compaction.major.ratio=7 | |
bloom_test2> config -t bloom_test2 -s table.bloom.enabled=true | |
bloom_test2> exit | |
export TABLE=bloom_test2 | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 7 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'flush -t bloom_test2 -w' | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 8 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'flush -t bloom_test2 -w' | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchWriter -s 9 $AOPTIONS $TABLE $NUM $MIN $MAX $VALUE_SIZE $MAX_MEMORY $MAX_LATENCY $NUM_THREADS $COLUMN_VISIBILITY | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'flush -t bloom_test2 -w' | |
### read table without bloom filter. | |
export TABLE=bloom_test1 | |
export NUM=500 | |
# same seed, records are found. | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchScanner -s 7 $AOPTIONS $TABLE $NUM $MIN $MAX $EXPECTED_VALUE_SIZE $NUM_THREADS $COLUMN_VISIBILITY | |
# different seed, no results | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchScanner -s 8 $AOPTIONS $TABLE $NUM $MIN $MAX $EXPECTED_VALUE_SIZE $NUM_THREADS $COLUMN_VISIBILITY | |
### read table with bloom filter. | |
export TABLE=bloom_test2 | |
bin/accumulo $EXAMPLE_PACKAGE.client.RandomBatchScanner -s 7 $AOPTIONS $TABLE $NUM $MIN $MAX $EXPECTED_VALUE_SIZE $NUM_THREADS $COLUMN_VISIBILITY | |
### verify the map tables | |
# display the table ids. | |
bin/accumulo shell -u $AUSER -p $APASSWORD -e 'tables -l' | |
# display the hdfs files associated with the table id. | |
hadoop fs -lsr /accumulo/tables/3 | |
# use PrintInfo to show the fies has a bloom filter. | |
bin/accumulo org.apache.accumulo.core.file.rfile.PrintInfo /accumulo/tables/4/default_tablet/F000000e.rf | |
# ---------------------------- | |
# Examples from README.bulkIngest | |
# ---------------------------- | |
export TABLE=test_bulk | |
export FIRST_SPLIT=row_00000333 | |
export SECOND_SPLIT=row_00000666 | |
bin/accumulo $EXAMPLE_PACKAGE.mapreduce.bulk.SetupTable $AOPTIONS $TABLE $FIRST_SPLIT $SECOND_SPLIT | |
export START=0 | |
export END=1000 | |
export BULK_FILE=bulk/test_1.txt | |
bin/accumulo $EXAMPLE_PACKAGE.mapreduce.bulk.GenerateTestData $START $END $BULK_FILE | |
# | |
# see the file that was just created | |
# | |
hadoop fs -cat $BULK_FILE | |
export INPUT=bulk | |
export OUTPUT=tmp/bulkWork | |
bin/tool.sh lib/accumulo-examples-*[^c].jar $EXAMPLE_PACKAGE.mapreduce.bulk.BulkIngestExample $AOPTIONS $TABLE $INPUT $OUTPUT | |
bin/accumulo $EXAMPLE_PACKAGE.mapreduce.bulk.VerifyIngest $AOPTIONS $TABLE $START $END | |
# ------------------------------- | |
# Examples from README.combiner | |
# ------------------------------- | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
>createtable runners | |
# enter 'stat' and '10' when asked | |
runners> setiter -t runners -p 10 -scan -minc -majc -n decStats -class org.apache.accumulo.examples.combiner.StatsCombiner | |
runners> setiter -t runners -p 11 -scan -minc -majc -n hexStats -class org.apache.accumulo.examples.combiner.StatsCombiner | |
runners> insert 123456 name first Joe | |
runners> insert 123456 stat marathon 240 | |
runners> scan | |
runners> insert 123456 stat marathon 230 | |
runners> insert 123456 stat marathon 220 | |
# | |
# The next scan will show the min, max, sum, and count for the 123456:stat:marathon row. | |
# | |
runners> scan | |
runners> insert 123456 hstat virtualMarathon 6a | |
runners> insert 123456 hstat virtualMarathon 6b | |
# | |
# The next scan will show the min, max, sum, and count (in hexadecimal) for the 123456:hstat:marathon row. | |
# | |
runners> scan | |
runners> exit | |
# ------------------------------- | |
# Examples from README.constraints | |
# ------------------------------- | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable testConstraints | |
testConstraints> config -t testConstraints -s table.constraint.1=org.apache.accumulo.examples.constraints.NumericValueConstraint | |
testConstraints> config -t testConstraints -s table.constraint.2=org.apache.accumulo.examples.constraints.AlphaNumKeyConstraint | |
testConstraints> insert r1 cf1 cq1 1111 | |
testConstraints> insert r1 cf1 cq1 ABC | |
Constraint Failures: | |
ConstraintViolationSummary(...NumericValueConstraint, ..., violationDescription:Value is not numeric...) | |
testConstraints> insert r1! cf1 cq1 ABC | |
Constraint Failures: | |
ConstraintViolationSummary(...NumericValueConstraint, ..., violationDescription:Value is not numeric...) | |
ConstraintViolationSummary(...AlphaNumKeyConstraint, ..., violationDescription:Row was not alpha numeric...) | |
testConstraints> scan | |
r1 cf1:cq1 [] 1111 | |
testConstraints> exit | |
# ------------------------------- | |
# Examples from README.dirlist | |
# ------------------------------- | |
export DIR_TABLE=dirTable | |
export INDEX_TABLE=indexTable | |
export DATA_TABLE=dataTable | |
export AUTHORIZATION=exampleVis | |
export COLUMN_VISIBILITY=exampleVis | |
export DATA_CHUNK_SIZE=100000 | |
export DIR_TO_INDEX=/home/$USER/workspace | |
# index the directory on local disk | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.Ingest $AOPTIONS $DIR_TABLE $INDEX_TABLE $DATA_TABLE $COLUMN_VISIBILITY $DATA_CHUNK_SIZE $DIR_TO_INDEX | |
export DIR_TO_VIEW=/home/$USER/workspace/accumulo/conf | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.Viewer $AOPTIONS $DIR_TABLE $DATA_TABLE $AUTHORIZATION $DIR_TO_VIEW | |
# display information about a directory. | |
export DIR_TO_VIEW=/home/$USER/workspace/accumulo/conf | |
bin/accumulo org.apache.accumulo.examples.dirlist.QueryUtil $AOPTIONS $DIR_TABLE $COLUMN_VISIBILITY $DIR_TO_VIEW | |
# find files | |
export FILE_TO_FIND=masters.example | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.QueryUtil $AOPTIONS $INDEX_TABLE $COLUMN_VISIBILITY $FILE_TO_FIND -search | |
export TRAILING_WILDCARD="masters*" | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.QueryUtil $AOPTIONS $INDEX_TABLE $COLUMN_VISIBILITY $TRAILING_WILDCARD -search | |
export LEADING_WILDCARD="*.jar" | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.QueryUtil $AOPTIONS $INDEX_TABLE $COLUMN_VISIBILITY $LEADING_WILDCARD -search | |
export WILDCARD="commons*.jar" | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.QueryUtil $AOPTIONS $INDEX_TABLE $COLUMN_VISIBILITY $WILDCARD -search | |
# count files | |
export AUTHORIZATION=exampleVis | |
export COLUMN_VISIBILITY=exampleVis | |
bin/accumulo $EXAMPLE_PACKAGE.dirlist.FileCount $AOPTIONS $DIR_TABLE $AUTHORIZATION $COLUMN_VISIBILITY | |
# ------------------------------- | |
# Examples from README.filedata | |
# ------------------------------- | |
How is FileDataIngest used? | |
* FileDataIngest - Takes a list of files and archives them into Accumulo keyed on the SHA1 hashes of the files. | |
# ------------------------------- | |
# Examples from README.filter | |
# ------------------------------- | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable filtertest | |
filtertest> setiter -t filtertest -scan -p 10 -n myfilter -filter | |
WAITING FOR JIRA TICKET RESOLUTION. | |
# ------------------------------- | |
# Examples from README.helloworld | |
# ------------------------------- | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable hellotable | |
hellotable> exit | |
export TABLE=hellotable | |
bin/accumulo $EXAMPLE_PACKAGE.helloworld.InsertWithBatchWriter $AINSTANCE $AZOOKEEPERS $TABLE $AUSER $APASSWORD | |
# insert via map-reduce | |
bin/accumulo $EXAMPLE_PACKAGE.helloworld.InsertWithOutputFormat $AINSTANCE $AZOOKEEPERS $TABLE $AUSER $APASSWORD | |
# display the records using the shell | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> table hellotable | |
> scan | |
> exit | |
# display the records | |
bin/accumulo $EXAMPLE_PACKAGE.helloworld.ReadData $AINSTANCE $AZOOKEEPERS $TABLE $AUSER $APASSWORD | |
# ------------------------------- | |
# Examples from README.mapred | |
# ------------------------------- | |
hadoop fs -copyFromLocal $ACCUMULO_HOME/README wc/Accumulo.README | |
hadoop fs -ls wc | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable wordCount -a count=org.apache.accumulo.core.iterators.aggregation.StringSummation | |
> exit | |
export INPUT=wc | |
export OUTPUT=wordCount | |
bin/tool.sh lib/accumulo-examples-*[^c].jar $EXAMPLE_PACKAGE.mapreduce.WordCount $AINSTANCE $AZOOKEEPERS $INPUT $OUTPUT -u $AUSER -p $APASSWORD | |
# read the count from the accumulo table. | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> table wordCount | |
wordCount> scan -b the | |
wordCount> exit | |
# ------------------------------- | |
# Examples from README.shard | |
# ------------------------------- | |
# create accumulo tables | |
bin/accumulo shell -u $AUSER -p $APASSWORD | |
> createtable shard | |
shard> createtable doc2term | |
doc2term> exit | |
# index some files | |
cd $ACCUMULO_HOME | |
export SHARD_TABLE=shard | |
export NUM_PARTITIONS=30 | |
find src -name "*.java" | xargs bin/accumulo $EXAMPLE_PACKAGE.shard.Index $AINSTANCE $AZOOKEEPERS $SHARD_TABLE $AUSER $APASSWORD $NUM_PARTITIONS | |
export TERMS_TO_FIND="foo bar" | |
bin/accumulo $EXAMPLE_PACKAGE.shard.Query $AINSTANCE $AZOOKEEPERS $SHARD_TABLE $AUSER $APASSWORD $TERMS_TO_FIND | |
# populate doc2term | |
export DOC2TERM_TABLE=doc2term | |
bin/accumulo $EXAMPLE_PACKAGE.shard.Reverse $AINSTANCE $AZOOKEEPERS $SHARD_TABLE $DOC2TERM_TABLE $AUSER $APASSWORD | |
export NUM_TERMS=5 | |
export ITERATION_COUNT=5 | |
bin/accumulo org.apache.accumulo.examples.shard.ContinuousQuery $AINSTANCE $AZOOKEEPERS $SHARD_TABLE $DOC2TERM_TABLE $AUSER $APASSWORD $NUM_TERMS $ITERATION_COUNT | |
##################################################################################### | |
##################################################################################### | |
##################################################################################### | |
# --------------------------------------------- | |
# Other programs in client package | |
# --------------------------------------------- | |
bin/accumulo $EXAMPLE_PACKAGE.client.Flush $A_OPTIONS $TABLE | |
# To see all options. | |
bin/accumulo $EXAMPLE_PACKAGE.client.ReadWriteExample | |
bin/accumulo $EXAMPLE_PACKAGE.client.ReadWriteExample -i $AINSTANCE -z $AZOOKEEPERS -u $AUSER -p $APASSWORD -t $TABLE -s $COLUMN_VISIBILITY --read | |
bin/accumulo $EXAMPLE_PACKAGE.client.RowOperations $AOPTIONS | |
./src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java | |
./src/main/java/org/apache/accumulo/examples/isolation/InterferenceTest.java |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment