wyukawa · October 13, 2012 14:14
diff --git a/gistfile1.txt b/gistfile1.txt
 -- 全ての教科が50点以上

 $ cat TestScores.csv
 100,math,100
 100,Japanese,80
 100,science,80
 200,math,80
 200,Japanese,95
 300,math,40
 300,Japanese,90
 300,society,55
 400,math,80

 $ cat TestScores.pig 
 test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);

 grp = GROUP test BY student_id;

 f = FOREACH grp {

  fil = FILTER test BY score < 50;
  GENERATE group, (IsEmpty(fil) ? 0 : 1) AS condition;

 }

 result = FILTER f BY condition == 0;

 DUMP result;

 $ cat TestScores2.csv 
 student_id,subject,score
 100,math,100
 100,Japanese,80
 100,science,80
 200,math,80
 200,Japanese,95
 300,math,40
 300,Japanese,90
 300,society,55
 400,math,80

 $ cat TestScores.sql 
 CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
 	AS SELECT * 
 		FROM CSVREAD('TestScores2.csv');
    
 SELECT DISTINCT student_id FROM test ts1
 WHERE NOT EXISTS
 	(SELECT * 
 		FROM test ts2
 		WHERE ts2.STUDENT_ID = ts1.STUDENT_ID
 		AND ts2.score < 50);

 -- mathが80点以上でJapaneseが50点以上

 $ cat TestScores2.pig 
 test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);

 test_fil = FILTER test BY subject == 'math' OR subject == 'Japanese';

 grp = GROUP test_fil BY student_id;

 f = FOREACH grp {

  fil_math = FILTER test_fil BY subject == 'math' AND score < 80;
  fil_japanese = FILTER test_fil BY subject == 'Japanese' AND score < 50;
  GENERATE group, COUNT(test_fil.subject) AS cnt, (IsEmpty(fil_math) AND IsEmpty(fil_japanese) ? 0 : 1) AS condition;

 }

 result = FILTER f BY cnt == 2 AND condition == 0;

 DUMP result;

 $ cat TestScores2.sql 
 CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
 	AS SELECT * 
 		FROM CSVREAD('TestScores2.csv');
    

 SELECT student_id
 	FROM test ts1
 	WHERE subject IN ('math', 'Japanese')
 	AND NOT EXISTS
 		(SELECT *
 			FROM test ts2
 			WHERE ts2.student_id = ts1.student_id
 			AND 1 = CASE WHEN subject = 'math' AND score < 80 THEN 1
 			             WHEN subject = 'Japanese' AND score < 50 THEN 1
 				     ELSE 0 END)
 	GROUP BY student_id
 	HAVING COUNT(*) = 2;
	-- 全ての教科が50点以上

	$ cat TestScores.csv
	100,math,100
	100,Japanese,80
	100,science,80
	200,math,80
	200,Japanese,95
	300,math,40
	300,Japanese,90
	300,society,55
	400,math,80

	$ cat TestScores.pig
	test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);

	grp = GROUP test BY student_id;

	f = FOREACH grp {

	fil = FILTER test BY score < 50;
	GENERATE group, (IsEmpty(fil) ? 0 : 1) AS condition;

	}

	result = FILTER f BY condition == 0;

	DUMP result;

	$ cat TestScores2.csv
	student_id,subject,score
	100,math,100
	100,Japanese,80
	100,science,80
	200,math,80
	200,Japanese,95
	300,math,40
	300,Japanese,90
	300,society,55
	400,math,80

	$ cat TestScores.sql
	CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
	AS SELECT *
	FROM CSVREAD('TestScores2.csv');

	SELECT DISTINCT student_id FROM test ts1
	WHERE NOT EXISTS
	(SELECT *
	FROM test ts2
	WHERE ts2.STUDENT_ID = ts1.STUDENT_ID
	AND ts2.score < 50);

	-- mathが80点以上でJapaneseが50点以上

	$ cat TestScores2.pig
	test = LOAD 'TestScores.csv' USING PigStorage(',') AS(student_id:int,subject:chararray,score:int);

	test_fil = FILTER test BY subject == 'math' OR subject == 'Japanese';

	grp = GROUP test_fil BY student_id;

	f = FOREACH grp {

	fil_math = FILTER test_fil BY subject == 'math' AND score < 80;
	fil_japanese = FILTER test_fil BY subject == 'Japanese' AND score < 50;
	GENERATE group, COUNT(test_fil.subject) AS cnt, (IsEmpty(fil_math) AND IsEmpty(fil_japanese) ? 0 : 1) AS condition;

	}

	result = FILTER f BY cnt == 2 AND condition == 0;

	DUMP result;

	$ cat TestScores2.sql
	CREATE TABLE test(student_id INT, subject VARCHAR(255), score INT)
	AS SELECT *
	FROM CSVREAD('TestScores2.csv');


	SELECT student_id
	FROM test ts1
	WHERE subject IN ('math', 'Japanese')
	AND NOT EXISTS
	(SELECT *
	FROM test ts2
	WHERE ts2.student_id = ts1.student_id
	AND 1 = CASE WHEN subject = 'math' AND score < 80 THEN 1
	WHEN subject = 'Japanese' AND score < 50 THEN 1
	ELSE 0 END)
	GROUP BY student_id
	HAVING COUNT(*) = 2;
No results found