Skip to content

Instantly share code, notes, and snippets.

@wyukawa
Created October 13, 2012 14:14
Show Gist options
  • Select an option

  • Save wyukawa/3884759 to your computer and use it in GitHub Desktop.

Select an option

Save wyukawa/3884759 to your computer and use it in GitHub Desktop.
exist例
-- 全ての教科が50点以上
$ cat TestScores.csv
100,math,100
100,Japanese,80
100,science,80
200,math,80
200,Japanese,95
300,math,40
300,Japanese,90
300,society,55
400,math,80
$ cat TestScores.pig
test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);
grp = GROUP test BY student_id;
f = FOREACH grp {
fil = FILTER test BY score < 50;
GENERATE group, (IsEmpty(fil) ? 0 : 1) AS condition;
}
result = FILTER f BY condition == 0;
DUMP result;
$ cat TestScores2.csv
student_id,subject,score
100,math,100
100,Japanese,80
100,science,80
200,math,80
200,Japanese,95
300,math,40
300,Japanese,90
300,society,55
400,math,80
$ cat TestScores.sql
CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
AS SELECT *
FROM CSVREAD('TestScores2.csv');
SELECT DISTINCT student_id FROM test ts1
WHERE NOT EXISTS
(SELECT *
FROM test ts2
WHERE ts2.STUDENT_ID = ts1.STUDENT_ID
AND ts2.score < 50);
-- mathが80点以上でJapaneseが50点以上
$ cat TestScores2.pig
test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);
test_fil = FILTER test BY subject == 'math' OR subject == 'Japanese';
grp = GROUP test_fil BY student_id;
f = FOREACH grp {
fil_math = FILTER test_fil BY subject == 'math' AND score < 80;
fil_japanese = FILTER test_fil BY subject == 'Japanese' AND score < 50;
GENERATE group, COUNT(test_fil.subject) AS cnt, (IsEmpty(fil_math) AND IsEmpty(fil_japanese) ? 0 : 1) AS condition;
}
result = FILTER f BY cnt == 2 AND condition == 0;
DUMP result;
$ cat TestScores2.sql
CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
AS SELECT *
FROM CSVREAD('TestScores2.csv');
SELECT student_id
FROM test ts1
WHERE subject IN ('math', 'Japanese')
AND NOT EXISTS
(SELECT *
FROM test ts2
WHERE ts2.student_id = ts1.student_id
AND 1 = CASE WHEN subject = 'math' AND score < 80 THEN 1
WHEN subject = 'Japanese' AND score < 50 THEN 1
ELSE 0 END)
GROUP BY student_id
HAVING COUNT(*) = 2;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment