project structure:
.
├── oozie
│ ├── upload.sh
│ ├── combined_queries
│ ├── ...
│ └── simple_reports
│ ├── lib
│ │ ├── avro-1.7.4.jar
[alias] | |
co = checkout | |
ci = commit | |
st = status -sb | |
cln = remote prune origin | |
br = branch | |
hist = log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short | |
hist-graph = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short | |
lr = "!f() { git log $1...$2 --left-right --oneline; }; f" | |
type = cat-file -t |
project structure:
.
├── oozie
│ ├── upload.sh
│ ├── combined_queries
│ ├── ...
│ └── simple_reports
│ ├── lib
│ │ ├── avro-1.7.4.jar
in ~/.bash_login or ~/.bash_profile
export HDP_HOME=$HOME/opt/hadoop-2.0.0-cdh4.4.0
find . -type f -print0 | xargs -0 gsed -i 's/\.sum(/.sum[Double](/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/\.plus\[/.sum[/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/import com.twitter.scalding.DateOps.richDateToCalendar/import com.twitter.scalding.RichDate.toCalendar/' | |
find . -type f -print0 | xargs -0 gsed -i 's/ RichDate("\([^"]\+\)")(\([^)]\+\))/ com.twitter.scalding.DateParser.default.parse("\1")(\2).get/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/\.then[^(Do)]/.thenDo/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/Mode\.mode/mode/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/new RichDate/RichDate/g' | |
find . -type f -print0 | xargs -0 gsed -i 's/import scalding.avro/import com.twitter.scalding.avro/' |
remove_head() { | |
for BRANCH in $ALL_BRANCHES; | |
do | |
if [ "$BRANCH" = "->" ] || [ "$BRANCH" = "origin/HEAD" ]; then | |
continue | |
fi | |
echo $BRANCH | |
done | sort -u | |
} |
// 2 m/r jobs :-( | |
.unique('item_id_from, 'item_id_to, 'user_id) // 1st m/r | |
.groupBy('item_id_from, 'item_id_to) { _.size('count) } // 2nd m/r | |
// 1 m/r job but more code | |
.map('user_id -> 'user_id) { id: String => Set(id) } | |
.groupBy('item_id_from, 'item_id_to) { | |
_.sum[Set[String}]('user_id) | |
} | |
.map('user_id -> 'count) { ids: Set[String] => ids.size } |
import java.io.PrintWriter | |
import cascading.stats.CascadingStats | |
import com.twitter.scalding._ | |
/** | |
* Writes all custom counters into a tsv file args("counters-file") if this property is set. | |
* | |
* Output format: | |
* counter_name value |
import turtle
t = turtle.Pen()
def line(count, size, alpha, beta):
if count==0:
return
else:
t.forward(size)
t.right(180 - beta)