hello009-commits

Twitter Official Consumer Key

type:            PIN
Consumer key:    3nVuSoBZnx6U4vzUxf5w
Consumer secret: Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys

type:            PIN

Consumer key: IQKbtAYlXLripLGPWd0HUA

Required tools for playing around with memory:

A useful set of Lucene fundamentals that are good for grok'ing Elasticsearch.

document: a record; the unit of search; the thing returned as search results
field: a typed slot in a document for storing and indexing values
index: a collection of documents, typically with the same field mappings or schema
corpus: the entire set of documents in an index

	package org.apache.spark.sql

	import java.io.ByteArrayOutputStream

	import com.esotericsoftware.kryo.io.Input
	import org.apache.hadoop.io.{NullWritable, BytesWritable}
	import org.apache.spark.rdd.RDD
	import org.apache.spark.serializer.KryoSerializer
	import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
	import org.apache.spark.{SparkContext, SparkConf}

	package org.apache.spark.metrics.source

	import com.codahale.metrics.{Counter, Gauge, Meter, MetricRegistry}
	import org.apache.spark.{Accumulator, SparkEnv}
	import org.joda.time.DateTime

	import scala.collection.mutable

	/**
	* <h1>SparkInstrumentation</h1>

	package my.spark

	/**
	* Created by jude on 15-9-16.
	*/
	import scala.collection.JavaConversions.seqAsJavaList
	import java.io.{FileReader, BufferedReader, File}
	import java.util
	import java.util.regex.Pattern
	import breeze.io.TextReader.FileReader

	/**
	* This file contains the core idea of wrapping an underlying OutputFormat with an OutputFormat
	* with an augmented key that writes to partitions using MultipleOutputs (or something similar)
	*/

	package model.hadoop

	import model.hadoop.HadoopIO.MultipleOutputer
	import model.hadoop.HadoopIO.MultipleOutputer._
	import org.apache.hadoop.io.{DataInputBuffer, NullWritable}

	package com.aol.advertising.execution

	import org.apache.spark.{SparkConf, SparkContext, HashPartitioner}
	import scala.sys.process._

	import java.io._

	import org.apache.hadoop.io._
	import org.apache.hadoop.mapred.{FileSplit, TextInputFormat}
	import org.apache.spark.rdd.HadoopRDD

	import ComparisonChain._
	import java.io.BufferedInputStream
	import java.io.BufferedOutputStream
	import java.io.BufferedReader
	import java.io.BufferedWriter
	import java.io.Closeable
	import java.io.File
	import java.io.File
	import java.io.FileInputStream
	import java.io.FilenameFilter

	package org.icgc.dcc.etl.staging.function;

	import static com.google.common.base.Stopwatch.createStarted;
	import static com.google.common.collect.Iterables.toArray;
	import static org.icgc.dcc.common.core.util.FormatUtils.formatCount;
	import static org.icgc.dcc.common.core.util.FormatUtils.formatPercent;
	import static org.icgc.dcc.common.core.util.Splitters.TAB;

	import java.io.Serializable;
	import java.util.Iterator;