This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package w20160827 | |
case class Log(s: String, l: Long, f: String, t: String, p: String) { | |
def usingFixedSize: String = { | |
// 23 = Long.MaxValue.toString.length + ("\t" * 4).length | |
val size = 23 + s.length + f.length + t.length + p.length | |
val sb = new StringBuilder(size, s) | |
sb append "\t"; sb append l | |
sb append "\t"; sb append f |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def topByKey(key: String, orderBy: String, n: Int): DataFrame = { | |
val keyIndex = df.schema.fieldIndex(key) | |
val orderByIndex = df.schema.fieldIndex(orderBy) | |
val ord = df.schema.fields(orderByIndex).dataType match { | |
case o: StringType => Ordering.by[Row, String](_.getString(orderByIndex)) | |
case o: IntegerType => Ordering.by[Row, Int](_.getInt(orderByIndex)) | |
case o: LongType => Ordering.by[Row, Long](_.getLong(orderByIndex)) | |
case o: FloatType => Ordering.by[Row, Float](_.getFloat(orderByIndex)) | |
case o: DoubleType => Ordering.by[Row, Double](_.getDouble(orderByIndex)) | |
case _ => throw new IllegalArgumentException |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
protected def typeToTypeTag[T](tpe: Type): TypeTag[T] = { | |
TypeTag(currentMirror, new TypeCreator { | |
override def apply[U <: Universe with Singleton](m: api.Mirror[U]): U#Type = { | |
tpe.asInstanceOf[U#Type] | |
} | |
}) | |
} | |
protected def typeToClassTag[T](tpe: Type): ClassTag[T] = typeTagToClassTag(typeToTypeTag(tpe)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.util.AccumulatorV2 | |
class MapAccumulator[K] extends AccumulatorV2[Map[K, Long], Map[K, Long]] { | |
var underlying = new scala.collection.mutable.HashMap[K, Long] | |
override def isZero: Boolean = underlying.isEmpty | |
override def copy(): MapAccumulator[K] = { | |
val newAcc = new MapAccumulator[K] | |
newAcc.underlying = this.underlying.clone() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# namuwiki_170327기준 나무위키 덤프는 Json Array로 저장되어 분산 처리를 할 수 없음 | |
# 분산 처리를 위해서 한 라인에 하나의 Json Object로 저장할 필요가 있음 | |
# input: namuwiki_170327.json | |
# output: output.json | |
jq -nc --stream 'fromstream(1|truncate_stream(inputs))' namuwiki_170327.json > output.json | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
with open('path/to/htk.feat', 'rb') as f: | |
a = np.fromfile(f, dtype='>i4', count = 2) | |
b = np.fromfile(f, dtype='>i2', count = 2) | |
fdim = int(b[0] / 4) | |
fea = np.fromfile(f, dtype='>f4').reshape(-1, fdim) | |
nframes = fea.shape[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://kssc.kostat.go.kr/ksscNew_web/kssc/common/CommonBoardList.do?gubun=1&strCategoryNameCode=019&strBbsId=kascrr&categoryMenu=014 | |
import pandas as pd | |
input = pd.ExcelFile("한국행정구역분류_2018.7.1.기준_최종.xls") | |
sheet = input.parse(4, 1) | |
sheet.to_csv("loc.csv") | |
sheet["label"] = sheet["시도"] + "_" + sheet["시군구"] | |
sheet["text"] = sheet["시도"] + " " + sheet["시군구"] + " " + sheet["행정구역명"] + " " + sheet["행정동"] + " " + sheet["법정동"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
class op: | |
def eval(self, values): | |
pass | |
def grad(self, values, over): | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.reflect.ClassTag | |
import scala.util.Random | |
class Reservoir[T: ClassTag]( | |
private val size: Int, | |
private val seed: Long = Random.nextLong()) extends Serializable { | |
private val rand = new Random(seed) | |
private val reservoir = new Array[T](size) | |
private var count = 0L |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object TopByKeyAggregatorProxy { | |
import scala.reflect.runtime.universe._ | |
/** | |
* Works on rows of the form (K1, K2, V) where K1 & K2 are IDs and V is the score value. Finds | |
* the top `num` K2 items based on the given Ordering. | |
*/ | |
def asTypedColumn[K1: TypeTag, K2: TypeTag, V: TypeTag] | |
(num: Int, ord: Ordering[(K2, V)]): TypedColumn[(K1, K2, V), Array[(K2, V)]] = { | |
Class.forName("org.apache.spark.ml.recommendation.TopByKeyAggregator") | |
.getConstructors |
OlderNewer