This article introduce how to find outliers using Local Outlier Detection (LOF) on Hivemall.
create database lof;
use lof;
create external table hundred_balls (
rowid int, This article introduce how to find outliers using Local Outlier Detection (LOF) on Hivemall.
create database lof;
use lof;
create external table hundred_balls (
rowid int, | /* | |
| * Hivemall: Hive scalable Machine Learning Library | |
| * | |
| * Copyright (C) 2015 Makoto YUI | |
| * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * |
| create table similarities | |
| as | |
| SELECT | |
| each_top_k( | |
| 10, t2.id, angular_similarity(t2.features, t1.features), | |
| t2.id, | |
| t1.id, | |
| t1.y | |
| ) as (rank, similarity, base_id, neighbor_id, y) | |
| FROM |
| create table similarities | |
| as | |
| WITH test_rnd as ( | |
| select | |
| rand(31) as rnd, | |
| id, | |
| features | |
| from | |
| test_hivemall | |
| ), |
| /* | |
| * Hivemall: Hive scalable Machine Learning Library | |
| * | |
| * Copyright (C) 2015 Makoto YUI | |
| * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * |
| set hivevar:k=11; | |
| create table similarities | |
| as | |
| WITH test_rnd as ( | |
| select | |
| rand(31) as rnd, | |
| id, | |
| features | |
| from |
| /* | |
| * Hivemall: Hive scalable Machine Learning Library | |
| * | |
| * Copyright (C) 2015 Makoto YUI | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| _______1_______ | |
| ___2_______2___ | |
| _4___4___4___4_ | |
| 8_8_8_8_8_8_8_8 |
| push x[10]; push 10.0; ifeq 205; push x[5]; push 275.5; ifle 68; push x[1]; push 7.0; ifeq 11; push 1; goto last; push x[15]; push 2.0; ifeq 26; push x[14]; push 2.5; ifle 24; push x[0]; push 49.5; ifle 22; push 0; goto last; push 1; goto last; push 1; goto last; push x[11]; push 327.5; ifle 66; push x[11]; push 265.5; ifle 64; push x[1]; push 6.0; ifeq 37; push 0; goto last; push x[11]; push 87.5; ifle 42; push 0; goto last; push x[11]; push 190.0; ifle 62; push x[9]; push 15.0; ifle 60; push x[3]; push 0.0; ifeq 58; push x[13]; push 264.0; ifle 56; push 1; goto last; push 0; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push x[9]; push 18.5; ifle 128; push x[0]; push 25.0; ifle 76; push 1; goto last; push x[2]; push 2.0; ifeq 96; push x[11]; push 619.0; ifle 94; push x[6]; push 0.0; ifeq 87; push 1; goto last; push x[14]; push 3.5; ifle 92; push 0; goto last; push 1; goto last; push 0; goto last; push x[11]; push 153.0; ifle 101; push 0; goto last; |
| % 1. Title: Iris Plants Database | |
| % | |
| % 2. Sources: | |
| % (a) Creator: R.A. Fisher | |
| % (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) | |
| % (c) Date: July, 1988 | |
| % | |
| % 3. Past Usage: | |
| % - Publications: too many to mention!!! Here are a few. | |
| % 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" |