hiropppe’s gists

hiropppe / nb.py

Last active December 16, 2015 03:52

	# -- coding: utf-8 --

	from abc import ABCMeta, abstractmethod

	import math
	import sys
	from collections import defaultdict

	class NB:

hiropppe / materialized_path_tree.sql

Last active February 2, 2016 15:01

	drop table cat;
	drop table item;
	drop table item_cat;

	create table if not exists cat (
	cid int(11) not null,
	cpath varchar(10) not null,
	cname varchar(100) not null,
	primary key(cid)
	);

hiropppe / pyspark_mllib_misc.py

Last active August 29, 2015 14:25

	# Pipeline(SparkML ml)

	from pyspark.ml import Pipeline
	from pyspark.ml.classification import LogisticRegression
	from pyspark.ml.feature import HashingTF, Tokenizer
	from pyspark.ml.evaluation import BinaryClassificationEvaluator
	from pyspark.ml.tuning import ParamGridBuilder
	from pyspark.ml.tuning import CrossValidator

	pos_files = sc.wholeTextFiles("hdfs://hdp1.containers.dev:9000/user/root/data/binary_clf/small/1")

hiropppe / splearn_misc.py

Last active August 29, 2015 14:25

	# Example Pipeline
	X = ["a b c d e spark", "b d", "spark f g h", "hadoop mapreduce"]
	X_rdd = sc.parallelize(X, 2)

	y = [1, 0, 1, 0]
	y_rdd = sc.parallelize(y, 2)

	Z = DictRDD((X_rdd, y_rdd), columns=('X', 'y'), dtype=[np.ndarray, np.ndarray])

	dist_pipeline = SparkPipeline((

hiropppe / pyspark_mlib_cv.py

Last active September 2, 2015 05:55

	import numpy as np

	from pyspark.sql import Row, SQLContext

	from pyspark.mllib.feature import HashingTF
	from pyspark.mllib.feature import IDF
	from pyspark.mllib.regression import LabeledPoint
	from pyspark.mllib.classification import SVMWithSGD, SVMModel
	from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel

hiropppe / splearn_cv.py

Last active August 29, 2015 14:25

	import numpy as np

	from splearn.rdd import ArrayRDD
	from splearn.rdd import DictRDD

	from splearn.feature_extraction.text import SparkCountVectorizer
	from splearn.feature_extraction.text import SparkHashingVectorizer
	from splearn.feature_extraction.text import SparkTfidfTransformer
	from splearn.naive_bayes import SparkMultinomialNB
	from splearn.naive_bayes import SparkGaussianNB

hiropppe / sklearn_cv.py

Last active September 3, 2015 04:48

	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.feature_extraction.text import HashingVectorizer
	from sklearn.linear_model import SGDClassifier
	from sklearn.svm import SVC
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline
	from sklearn.cross_validation import cross_val_score
	from jpjplearn.datasets import load_clf_corpus
	from jpjplearn.analyzer import mecab_analyzer

hiropppe / IdWorker.java

Last active February 2, 2016 14:55

	public class IdWorker {

	long workerBits = 5L;
	long datacenterBits = 5L;
	long sequenceBits = 12L;

	long workerIdShift = sequenceBits;
	long datacenterIdShift = sequenceBits + workerBits;
	long timestampLeftShift = sequenceBits + workerBits + datacenterBits;

hiropppe / zip.sql

Last active October 2, 2015 13:52

MySQL function which behaves like zip function.

	CREATE FUNCTION `zip`(_first text, _second text, _separator text, _pair_separator text) RETURNS text CHARSET utf8
	BEGIN
	DECLARE _ret text;

	IF 0 < LENGTH(_first) THEN
	SELECT
	GROUP_CONCAT(
	CONCAT_WS(
	_pair_separator,
	REPLACE(SUBSTRING_INDEX(v.`first`, _separator, p.rownum), CONCAT(SUBSTRING_INDEX(v.`first`, _separator, p.rownum - 1), _separator), ''),

hiropppe / timeit_mongo_join_sample.py

Last active October 27, 2015 14:30

	import pymongo
	from bson.dbref import DBRef
	from collections import OrderedDict
	from random import randrange, choice

	client = pymongo.MongoClient()
	test = client.test

	spot = test.spot
	play = test.play