This article introduce how to find outliers using Local Outlier Detection (LOF) on Hivemall.
create database lof;
use lof;
create external table hundred_balls (
rowid int,
This article introduce how to find outliers using Local Outlier Detection (LOF) on Hivemall.
create database lof;
use lof;
create external table hundred_balls (
rowid int,
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* |
create table similarities | |
as | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM |
create table similarities | |
as | |
WITH test_rnd as ( | |
select | |
rand(31) as rnd, | |
id, | |
features | |
from | |
test_hivemall | |
), |
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* |
set hivevar:k=11; | |
create table similarities | |
as | |
WITH test_rnd as ( | |
select | |
rand(31) as rnd, | |
id, | |
features | |
from |
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 |
_______1_______ | |
___2_______2___ | |
_4___4___4___4_ | |
8_8_8_8_8_8_8_8 |
push x[10]; push 10.0; ifeq 205; push x[5]; push 275.5; ifle 68; push x[1]; push 7.0; ifeq 11; push 1; goto last; push x[15]; push 2.0; ifeq 26; push x[14]; push 2.5; ifle 24; push x[0]; push 49.5; ifle 22; push 0; goto last; push 1; goto last; push 1; goto last; push x[11]; push 327.5; ifle 66; push x[11]; push 265.5; ifle 64; push x[1]; push 6.0; ifeq 37; push 0; goto last; push x[11]; push 87.5; ifle 42; push 0; goto last; push x[11]; push 190.0; ifle 62; push x[9]; push 15.0; ifle 60; push x[3]; push 0.0; ifeq 58; push x[13]; push 264.0; ifle 56; push 1; goto last; push 0; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push x[9]; push 18.5; ifle 128; push x[0]; push 25.0; ifle 76; push 1; goto last; push x[2]; push 2.0; ifeq 96; push x[11]; push 619.0; ifle 94; push x[6]; push 0.0; ifeq 87; push 1; goto last; push x[14]; push 3.5; ifle 92; push 0; goto last; push 1; goto last; push 0; goto last; push x[11]; push 153.0; ifle 101; push 0; goto last; |
% 1. Title: Iris Plants Database | |
% | |
% 2. Sources: | |
% (a) Creator: R.A. Fisher | |
% (b) Donor: Michael Marshall (MARSHALL%[email protected]) | |
% (c) Date: July, 1988 | |
% | |
% 3. Past Usage: | |
% - Publications: too many to mention!!! Here are a few. | |
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" |