Skip to content

Instantly share code, notes, and snippets.

View kmizumar's full-sized avatar

Kiyoshi Mizumaru kmizumar

  • Tokyo, Japan
  • 17:58 (UTC +09:00)
View GitHub Profile
@kmizumar
kmizumar / prereq-check.sh output
Created June 26, 2017 08:09
prereq-check.sh output on CentOS 7.3 and CentOS 6.8
Output of 453d0bbe62908bcf904b99520fdf7f390f445909
[root@maru-centos73 prereq-checks]# ./prereq-check.sh
Cloudera Manager & CDH Prerequisites Checks v1.2.1
System information
-------------------
FQDN: maru-centos73.gce.cloudera.com
Distro: CentOS Linux 7.3.1611 (Core)
Kernel: 3.10.0-514.21.1.el7.x86_64
import org.apache.spark.sql.types.DataTypes
def toInt(s: String): Option[Int] = {
try {
Some(s.toInt)
}
catch {
case e: Exception => None
}
}
val cp = sqlContext.parquetFile("cp-ordered-repartitioned")
val pf = sqlContext.parquetFile("pf-ordered-repartitioned")
val start = System.nanoTime()
cp.join(pf, cp("ac")===pf("ac")).drop(pf.col("ac")).write.parquet("join-2")
val end = System.nanoTime()
println("Time elapsed: " + (end-start)/1000 + " microsecs")
val src1 = sqlContext.parquetFile("output/1/tfpach/cp")
src1.orderBy("ac").write.partitionBy("ac").parquet("cp-ordered")
val src2 = sqlContext.parquetFile("output/1/tfpach/pf")
src2.orderBy("ac").write.partitionBy("ac").parquet("pf-ordered")
val cp = sqlContext.parquetFile("cp-ordered")
val pf = sqlContext.parquetFile("pf-ordered")
val start = System.nanoTime()
cp.join(pf, cp("ac")===pf("ac")).drop(pf.col("ac")).write.parquet("join-1")
val end = System.nanoTime()
println("Time elapsed: " + (end-start)/1000 + " microsecs")
val cp = sqlContext.parquetFile("output/1/tfpach/cp")
val pf = sqlContext.parquetFile("output/1/tfpach/pf")
val start = System.nanoTime()
cp.join(pf, cp("ac")===pf("ac")).drop(pf.col("ac")).write.parquet("join-0")
val end = System.nanoTime()
println("Time elapsed: " + (end-start)/1000 + " microsecs")
#!/usr/bin/env gosh
(use file.util)
(use gauche.collection)
(use gauche.parseopt)
(use gauche.process)
(use slib)
(use srfi-13)
(use rfc.json)
(use util.match)
@kmizumar
kmizumar / genfiles
Created January 12, 2015 16:21
traverse a directory tree and apply escm for template files
#!/usr/bin/env gosh
(use file.util)
(use gauche.collection)
(use gauche.parseopt)
(use gauche.process)
(use slib)
(use srfi-13)
(use rfc.json)
(use util.match)
@kmizumar
kmizumar / sblog.scm
Created January 9, 2015 08:09
create report from sss server log
#!/usr/bin/env gosh
;;
;; sss server log output report
;;
(use gauche.parseopt)
(use gauche.record)
(use slib)
(use text.csv)
(use util.match)
@kmizumar
kmizumar / tbcsv.scm
Created January 9, 2015 08:05
create a report from csv
#!/usr/bin/env gosh
;;
;; tokyobench csv output report
;;
(use gauche.parseopt)
(use gauche.record)
(use slib)
(use text.csv)
(use util.match)
select a.row_num as row, b.col_num as col, sum(a.value * b.value) as val
from a, b
where a.col_num = b.row_num
group by a.row_num, b.col_num
;