Marcin Cylke zygm0nt

Important!

Remember to run google-chrome like this, for the time of installing this user script:

$ google-chrome --enable-easy-off-store-extension-install

Guide: Writing Testable Code

To keep our code at Google in the best possible shape we provided our software engineers with these constant reminders. Now, we are happy to share them with the world.

Many thanks to these folks for inspiration and hours of hard work getting this guide done:

* Jonathan Wolter
* Russ Ruffer
* Miško Hevery

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.hbase.*;
	import org.apache.hadoop.hbase.client.Get;
	import org.apache.hadoop.hbase.client.Put;
	import org.apache.hadoop.hbase.client.Result;
	import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
	import org.apache.hadoop.hbase.coprocessor.ObserverContext;
	import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
	import org.apache.hadoop.hbase.regionserver.HRegion;

	> db.items.find({_id:ObjectId("4f7f4ae4251735803a942b2c")}).pretty()
	{
	"_id" : ObjectId("4f7f4ae4251735803a942b2c"),
	"category" : "",
	"title" : "sample title",
	"permalink" : "http://",
	"source" : "s",
	"description" : "Sample Description",
	"image" : "http://",
	"modificationDate" : ISODate("2012-04-06T19:58:28.448Z"),

	public class MRJob extends Configured implements Tool {


	public static void main(String[] args) throws Exception {
	int res = ToolRunner.run(HBaseConfiguration.create(), new MRJob(), args);
	System.exit(res);
	}

	@Override
	public int run(String[] args) throws Exception {

	#!/bin/bash
	# Description of script

	# bash strict mode: http://redsymbol.net/articles/unofficial-bash-strict-mode/
	set -euo pipefail
	IFS=$'\n\t'

	# Display usage if no parameters given
	if [[ -z "$@" ]]; then
	echo " ${0##*/} <input> - description"

	require 'rubygems'
	require 'nokogiri'
	require 'fileutils'
	require 'date'
	require 'uri'

	# usage: ruby import.rb my-blog.xml
	# my-blog.xml is a file from Settings -> Basic -> Export in blogger.

	data = File.read ARGV[0]

	<property>
	<name>fs.default.name/name>
	<value>hdfs://example-cluster</value>
	</property>


	<h2>Hadoop Configuration</h2><pre>Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml

	<table><tr><th>Key</th><th>Value</th></tr><tr><td>dfs.datanode.data.dir</td><td>file://${hadoop.tmp.dir}/dfs/data</td</tr><tr><td>dfs.namenode.checkpoint.txns</td><td>40000</td</tr><tr><td>s3.replication</td><td>3</td</tr><tr><td>mapreduce.output.fileoutputformat.compress.type</td><td>RECORD</td</tr><tr><td>mapreduce.jobtracker.jobhistory.lru.cache.size</td><td>5</td</tr><tr><td>dfs.datanode.failed.volumes.tolerated</td><td>0</td</tr><tr><td>hadoop.http.filter.initializers</td><td>org.apache.hadoop.http.lib.StaticUserWebFilter</td</tr><tr><td>mapreduce.cluster.temp.dir</td><td>${hadoop.tmp.dir}/mapred/temp</td</tr><tr><td>mapreduce.reduce.shuffle.memory.limit.percent</td><td>0.25</td</tr><tr><td>yarn.nodemanager.keytab</td><td>/etc/krb5.keytab</td</tr><tr><td>mapreduce.reduce.skip.maxgroups</td><td>0</td</tr><tr><td>dfs.https.