This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from abc import ABCMeta, abstractmethod | |
import math | |
import sys | |
from collections import defaultdict | |
class NB: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table cat; | |
drop table item; | |
drop table item_cat; | |
create table if not exists cat ( | |
cid int(11) not null, | |
cpath varchar(10) not null, | |
cname varchar(100) not null, | |
primary key(cid) | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pipeline(SparkML ml) | |
from pyspark.ml import Pipeline | |
from pyspark.ml.classification import LogisticRegression | |
from pyspark.ml.feature import HashingTF, Tokenizer | |
from pyspark.ml.evaluation import BinaryClassificationEvaluator | |
from pyspark.ml.tuning import ParamGridBuilder | |
from pyspark.ml.tuning import CrossValidator | |
pos_files = sc.wholeTextFiles("hdfs://hdp1.containers.dev:9000/user/root/data/binary_clf/small/1") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example Pipeline | |
X = ["a b c d e spark", "b d", "spark f g h", "hadoop mapreduce"] | |
X_rdd = sc.parallelize(X, 2) | |
y = [1, 0, 1, 0] | |
y_rdd = sc.parallelize(y, 2) | |
Z = DictRDD((X_rdd, y_rdd), columns=('X', 'y'), dtype=[np.ndarray, np.ndarray]) | |
dist_pipeline = SparkPipeline(( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from pyspark.sql import Row, SQLContext | |
from pyspark.mllib.feature import HashingTF | |
from pyspark.mllib.feature import IDF | |
from pyspark.mllib.regression import LabeledPoint | |
from pyspark.mllib.classification import SVMWithSGD, SVMModel | |
from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from splearn.rdd import ArrayRDD | |
from splearn.rdd import DictRDD | |
from splearn.feature_extraction.text import SparkCountVectorizer | |
from splearn.feature_extraction.text import SparkHashingVectorizer | |
from splearn.feature_extraction.text import SparkTfidfTransformer | |
from splearn.naive_bayes import SparkMultinomialNB | |
from splearn.naive_bayes import SparkGaussianNB |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.linear_model import SGDClassifier | |
from sklearn.svm import SVC | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.pipeline import Pipeline | |
from sklearn.cross_validation import cross_val_score | |
from jpjplearn.datasets import load_clf_corpus | |
from jpjplearn.analyzer import mecab_analyzer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class IdWorker { | |
long workerBits = 5L; | |
long datacenterBits = 5L; | |
long sequenceBits = 12L; | |
long workerIdShift = sequenceBits; | |
long datacenterIdShift = sequenceBits + workerBits; | |
long timestampLeftShift = sequenceBits + workerBits + datacenterBits; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE FUNCTION `zip`(_first text, _second text, _separator text, _pair_separator text) RETURNS text CHARSET utf8 | |
BEGIN | |
DECLARE _ret text; | |
IF 0 < LENGTH(_first) THEN | |
SELECT | |
GROUP_CONCAT( | |
CONCAT_WS( | |
_pair_separator, | |
REPLACE(SUBSTRING_INDEX(v.`first`, _separator, p.rownum), CONCAT(SUBSTRING_INDEX(v.`first`, _separator, p.rownum - 1), _separator), ''), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
from bson.dbref import DBRef | |
from collections import OrderedDict | |
from random import randrange, choice | |
client = pymongo.MongoClient() | |
test = client.test | |
spot = test.spot | |
play = test.play |
OlderNewer