Skip to content

Instantly share code, notes, and snippets.

View mitallast's full-sized avatar

Alexey Korchevsky mitallast

View GitHub Profile
@mitallast
mitallast / SimpleApp.scala
Created February 5, 2016 12:07
Example app to classify sells
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.NaiveBayes
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{HashingTF, StringIndexer, Tokenizer}
import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
object SimpleApp {
def main(args: Array[String]) {
@mitallast
mitallast / console output
Created February 2, 2016 10:12
Cross validation spark example
+--------+--------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
|category| text|label| words| features| rawPrediction| probability|prediction|
+--------+--------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[514,986,1...|[-379.50617089769...|[3.15784456725782...| 1.0|
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[290,514,9...|[-395.54097963559...|[3.98362185323457...| 1.0|
| 0|"Держатель для мо...| 0.0|["держатель, для,...|(10000,[34,45,47,...|[-333.85171077966...|[0.88443426309164...| 0.0|
| 9|Шина Nordman RS 1...| 8.0|[шина, nordman, r...|(10000,[1124,1223...|[-70.906588615908...|[5.01470370003123...| 8.0|
| 833|"Набор для зарядк...| 1.0|["набор, для, зар...|(10000,[130,292,5...|[-530.30719860
@mitallast
mitallast / console output
Last active February 2, 2016 09:01
Apache spark NN test
+--------+--------------------+-----+--------------------+--------------------+----------+
|category| text|label| words| features|prediction|
+--------+--------------------+-----+--------------------+--------------------+----------+
| 0|"Мышь беспроводна...| 0.0|["мышь, беспровод...|(10000,[372,634,6...| 3.0|
| 9|покрышка Данлоп 2...| 8.0|[покрышка, данлоп...|(10000,[118,1828,...| 0.0|
| 0|"Стилус для Nokia...| 0.0|["стилус, для, no...|(10000,[45,290,57...| 1.0|
| 9|покрышка Континен...| 8.0|[покрышка, контин...|(10000,[50,121,18...| 0.0|
| 833|Alcatel OT-890 St...| 1.0|[alcatel, ot-890,...|(10000,[971,1031,...| 0.0|
| 833|"Nokia Asha 200 G...| 1.0|["nokia, asha, 20...|(10000,[544,548,1...| 0.0|
| 833|"Samsung Champ Ne...| 1.0|["samsung, champ,...|(10000,[182,325,6...| 0.0|
@mitallast
mitallast / console output
Last active October 16, 2019 11:50
Example Naive Bayes Classifier with Apache Spark Pipeline
+--------+--------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
|category| text|label| words| features| rawPrediction| probability|prediction|
+--------+--------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
| 3001|Плойки и наборы V...| 24.0|[плойки, и, набор...|(10000,[326,796,1...|[-174.67716870697...|[6.63481663197049...| 24.0|
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[514,986,1...|[-379.37151502387...|[5.32678001676623...| 1.0|
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[514,986,1...|[-379.84825219376...|[2.15785456821554...| 1.0|
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[290,514,9...|[-395.42735009477...|[6.44323423370500...| 1.0|
| 833|"Чехол-обложка дл...| 1.0|["чехол-обложка, ...|(10000,[290,514,9...|[-396.10251348
package org.questions;
import com.google.common.collect.Iterators;
import com.google.common.primitives.SignedBytes;
import com.google.common.primitives.UnsignedBytes;
import java.util.*;
public class Test {
@mitallast
mitallast / classifier.py
Created November 28, 2015 10:05
Apache spark text classifier
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import NaiveBayes
from pyspark.mllib.feature import HashingTF
textFile = sc.textFile("sells.csv")
htf = HashingTF(100000)
data = textFile.map(lambda line: line.split(',', 1)).map(lambda parts: LabeledPoint(parts[0], htf.transform(parts[1].split(" "))))
d_train, d_test = data.randomSplit([0.6, 0.4])
model = NaiveBayes.train(d_train)
prediction_and_labels = d_test.map(lambda point: (model.predict(point.features), point.label))
@mitallast
mitallast / gist:6076a96d89a1b533ca21
Created October 26, 2015 10:45
merge json object at postgresql with plpythonu
CREATE LANGUAGE PLPYTHONU;
CREATE OR REPLACE
FUNCTION merge_json(left JSON, right JSON)
RETURNS JSON AS $$
import simplejson as json
def merge(source, destination):
for key, value in source.items():
if isinstance(value, dict):
# get node or create one
@mitallast
mitallast / guitarplayer.py
Created October 2, 2015 12:30
parse guitarplayer.ru commerce.guitars for prs
# -*- coding: utf-8 -*-
from grab import Grab
import logging
import pickledb
from urlparse import urlparse, parse_qs
# logging.basicConfig(level=logging.DEBUG)
db = pickledb.load('guitarplayer.db', False)
keywords = ['prs', 'paul', 'reed', 'smith', u'прс']
@mitallast
mitallast / riemann.clj
Created September 24, 2015 15:41
sum by person example
; -*- mode: clojure; -*-
; vim: filetype=clojure
(logging/init {:file "/var/log/riemann/riemann.log"})
; Listen on the local interface over TCP (5555), UDP (5555), and websockets
; (5556)
(let [host "127.0.0.1"]
(tcp-server {:host host})
(udp-server {:host host})
@mitallast
mitallast / retrieve_metric_avg.lua
Last active April 19, 2017 20:54
Average metrics using redis operations
# retrieve_metric_avg.lua $operation_type
local operation_type = ARGV[1];
local count = redis.call("HGET", "operations_count", operation_type)
local total = redis.call("HGET", "operations_total", operation_type)
if not count or not total then
return 0
else
local avg = tonumber(total)/tonumber(count)
return tostring(avg)
end