project structure:
.
├── oozie
│ ├── upload.sh
│ ├── combined_queries
│ ├── ...
│ └── simple_reports
│ ├── lib
│ │ ├── avro-1.7.4.jar
| [alias] | |
| co = checkout | |
| ci = commit | |
| st = status -sb | |
| cln = remote prune origin | |
| br = branch | |
| hist = log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short | |
| hist-graph = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short | |
| lr = "!f() { git log $1...$2 --left-right --oneline; }; f" | |
| type = cat-file -t |
project structure:
.
├── oozie
│ ├── upload.sh
│ ├── combined_queries
│ ├── ...
│ └── simple_reports
│ ├── lib
│ │ ├── avro-1.7.4.jar
in ~/.bash_login or ~/.bash_profile
export HDP_HOME=$HOME/opt/hadoop-2.0.0-cdh4.4.0
| find . -type f -print0 | xargs -0 gsed -i 's/\.sum(/.sum[Double](/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/\.plus\[/.sum[/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/import com.twitter.scalding.DateOps.richDateToCalendar/import com.twitter.scalding.RichDate.toCalendar/' | |
| find . -type f -print0 | xargs -0 gsed -i 's/ RichDate("\([^"]\+\)")(\([^)]\+\))/ com.twitter.scalding.DateParser.default.parse("\1")(\2).get/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/\.then[^(Do)]/.thenDo/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/Mode\.mode/mode/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/new RichDate/RichDate/g' | |
| find . -type f -print0 | xargs -0 gsed -i 's/import scalding.avro/import com.twitter.scalding.avro/' |
| remove_head() { | |
| for BRANCH in $ALL_BRANCHES; | |
| do | |
| if [ "$BRANCH" = "->" ] || [ "$BRANCH" = "origin/HEAD" ]; then | |
| continue | |
| fi | |
| echo $BRANCH | |
| done | sort -u | |
| } |
| // 2 m/r jobs :-( | |
| .unique('item_id_from, 'item_id_to, 'user_id) // 1st m/r | |
| .groupBy('item_id_from, 'item_id_to) { _.size('count) } // 2nd m/r | |
| // 1 m/r job but more code | |
| .map('user_id -> 'user_id) { id: String => Set(id) } | |
| .groupBy('item_id_from, 'item_id_to) { | |
| _.sum[Set[String}]('user_id) | |
| } | |
| .map('user_id -> 'count) { ids: Set[String] => ids.size } |
| import java.io.PrintWriter | |
| import cascading.stats.CascadingStats | |
| import com.twitter.scalding._ | |
| /** | |
| * Writes all custom counters into a tsv file args("counters-file") if this property is set. | |
| * | |
| * Output format: | |
| * counter_name value |
import turtle
t = turtle.Pen()
def line(count, size, alpha, beta):
if count==0:
return
else:
t.forward(size)
t.right(180 - beta)