Skip to content

Instantly share code, notes, and snippets.

@clairvy
Created June 13, 2010 00:31
Show Gist options
  • Select an option

  • Save clairvy/436219 to your computer and use it in GitHub Desktop.

Select an option

Save clairvy/436219 to your computer and use it in GitHub Desktop.
#!/usr/bin/env php
<?php
$fp = fopen('php://stdin', 'r');
if (! $fp) {
exit('cannot open stdin');
}
while (! feof($fp)) {
$line = fgets($fp);
$year = substr($line, 15, 4);
$temp = substr($line, 87, 5);
$q = substr($line, 92, 1);
if ($temp !== '+9999' and in_array($q, array('0' ,'1', '4', '5', '9'), true)) {
echo "$year\t$temp\n";
}
}
#!/usr/bin/env php
<?php
$last_key = null;
$max_val = 0;
$fp = fopen('php://stdin', 'r');
if (! $fp) {
exit('cannot open stdin');
}
while (! feof($fp)) {
$line = fgets($fp);
$line = rtrim($line);
list($key, $val) = preg_split('/\t/', $line);
if ($last_key and $last_key !== $key) {
echo "$last_key\t$max_val\n";
$last_key = $key;
$max_val = $val;
} else {
$last_key = $key;
$max_val = max($max_val, $val);
}
}
if ($last_key) {
echo "$last_key\t$max_val\n";
}
#!/bin/sh
ext=php
export HADOOP_INSTALL=/path/to/hadoop-0.20.2
hadoop fs -rmr -skipTrash output
hadoop jar $HADOOP_INSTALL/contrib/streaming/hadoop-*-streaming.jar \
-input input/ncdc/sample.txt \
-output output \
-mapper ./max_temprature_map.$ext \
-reducer ./max_temprature_reduce.$ext \
-file ./max_temprature_map.$ext \
-file ./max_temprature_reduce.$ext
hadoop fs -cat output/part-00000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment