Last active
December 16, 2015 06:09
-
-
Save polyfractal/5389101 to your computer and use it in GitHub Desktop.
Code samples to accompany this article: http://euphonious-intuition.com/2013/04/not-just-for-search-using-elasticsearch-with-machine-learning-algorithms/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require 'vendor/autoload.php'; | |
use Sherlock\Sherlock; | |
function pprint($value) { | |
print_r($value); | |
echo "\r\n"; | |
} | |
$sherlock = new Sherlock(); | |
$sherlock->addNode("localhost"); | |
$indexName = 'dodgers'; | |
$typeName = 'data'; | |
$events = file_get_contents('data/Dodgers.data'); | |
$events = explode("\n", $events); | |
$mappings = array(); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Date()->field('timestamp'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->type('float')->field('value'); | |
$index = $sherlock->index($indexName)->mappings($mappings)->create(); | |
$docs = $sherlock->document()->index($indexName)->type($typeName); | |
foreach ($events as $event) { | |
$data = explode(",", $event); | |
$docData = array(); | |
if (trim($data[0]) == '') { | |
continue; | |
} | |
$docData['timestamp'] = strtotime(trim($data[0])); | |
$docData['value'] = trim($data[1]); | |
$docs->document($docData); | |
} | |
$response = $docs->execute(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require 'vendor/autoload.php'; | |
use Sherlock\Sherlock; | |
error_reporting(E_ERROR | E_WARNING | E_PARSE); | |
function pprint($value) { | |
print_r($value); | |
echo "\r\n"; | |
} | |
//Snagged from here: | |
//http://cad.cx/blog/2008/06/30/single-pass-standard-deviation-in-php/ | |
//modified slightly to return mean + variance | |
function stddev($array){ | |
//Don Knuth is the $deity of algorithms | |
//http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#III._On-line_algorithm | |
$n = 0; | |
$mean = 0; | |
$M2 = 0; | |
foreach($array as $x){ | |
$n++; | |
$delta = $x - $mean; | |
$mean = $mean + $delta/$n; | |
$M2 = $M2 + $delta*($x - $mean); | |
} | |
$variance = $M2/$n; | |
return array("mean" => $mean, "stddev" => sqrt($variance), "variance" => $variance); | |
} | |
function vwFacets($sherlock, $time) { | |
$timeFrom = date("Y-m-d\TH:i:sO", strtotime("-24 hour", $time)); | |
$timeTo = date("Y-m-d\TH:i:sO", $time); | |
$query = Sherlock::queryBuilder()->MatchAll(); | |
$filter = Sherlock::filterBuilder()->Range()->field('timestamp')->from($timeFrom)->to($timeTo); | |
$facet = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_5m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('5m') | |
->facet_filter($filter); | |
$facet2 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_10m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('10m') | |
->facet_filter($filter); | |
$facet3 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_30m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('30m') | |
->facet_filter($filter); | |
$facet4 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_1h_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('1h') | |
->facet_filter($filter); | |
$request = $sherlock->search()->index('dodgers')->type('data')->query($query)->facets($facet, $facet2, $facet3, $facet4)->size(0); | |
$response = $request->execute(); | |
$facets = array("time_24hr_5m_bucket", | |
"time_24hr_10m_bucket", | |
"time_24hr_30m_bucket", | |
"time_24hr_1h_bucket"); | |
$out = ""; | |
foreach($facets as $facet) { | |
//namespace | |
$out .= " |".$facet; | |
$c = 0; | |
foreach ($response->facets[$facet]['entries'] as $entry) { | |
//The data granularity is 5m, so we only need to check "total" on the | |
//5min bucket, since the other metrics will equal "total | |
if ($facet === 'time_24hr_5m_bucket') { | |
$combinations = array("total"); | |
} else { | |
$combinations = array("total", "min", "max", "mean"); | |
} | |
foreach($combinations as $value) { | |
//Make sure we don't log(0) | |
if ($entry[$value] == 0) { | |
$logValue = 0; | |
} else { | |
//absolute value is taken so we don't get complex results | |
$logValue = log(abs($entry[$value]),2); | |
} | |
//log feature | |
$out .= " ".$facet."_".$value."_log_".$c.":".$logValue; | |
//linear feature | |
$out .= " ".$facet."_".$value."_".$c.":".$entry[$value]; | |
//categorical feature, rounded so more examples share the same category | |
$out .= " ".$facet."_".$value."_".$c."_".round($entry[$value]).":1"; | |
//categorical feature, no rounding so there are potentially very sparse features | |
$out .= " ".$facet."_".$value."_".$c."_".$entry[$value].":1"; | |
} | |
$c += 1; | |
} | |
} | |
$timeFrom = date("Y-m-d\TH:i:sO", strtotime("-7 days", $time)); | |
$timeTo = date("Y-m-d\TH:i:sO", $time); | |
$filter = Sherlock::filterBuilder()->Range()->field('timestamp')->from($timeFrom)->to($timeTo); | |
$facet = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_1hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('1h') | |
->facet_filter($filter); | |
$facet2 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_2hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('2h') | |
->facet_filter($filter); | |
$facet3 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_6hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('6h') | |
->facet_filter($filter); | |
$facet4 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_12hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('12h') | |
->facet_filter($filter); | |
$facet5 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_24hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('24h') | |
->facet_filter($filter); | |
$request = $sherlock->search()->index('dodgers')->type('data')->query($query)->facets($facet, $facet2, $facet3, $facet4, $facet5)->size(0); | |
$response = $request->execute(); | |
$facets = array("time_7d_1hr_bucket", | |
"time_7d_2hr_bucket", | |
"time_7d_6hr_bucket", | |
"time_7d_12hr_bucket", | |
"time_7d_24hr_bucket"); | |
//all of these facets are >5min, so we can use all combinations for each facet | |
$combinations = array("total", "min", "max", "mean"); | |
foreach($facets as $facet) { | |
//namespace | |
$out .= " |".$facet; | |
$c = 0; | |
foreach ($response->facets[$facet]['entries'] as $entry) { | |
foreach($combinations as $value) { | |
//Make sure we don't log(0) | |
if ($entry[$value] == 0) { | |
$logValue = 0; | |
} else { | |
//absolute value is taken so we don't get complex results | |
$logValue = log(abs($entry[$value]),2); | |
} | |
//log feature | |
$out .= " ".$facet."_".$value."_log_".$c.":".$logValue; | |
//linear feature | |
$out .= " ".$facet."_".$value."_".$c.":".$entry[$value]; | |
//categorical feature, rounded so more examples share the same category | |
$out .= " ".$facet."_".$value."_".$c."_".round($entry[$value]).":1"; | |
//categorical feature, no rounding so there are potentially very sparse features | |
$out .= " ".$facet."_".$value."_".$c."_".$entry[$value].":1"; | |
} | |
$c += 1; | |
} | |
} | |
if (strpos(strtolower($out),"inf") || strpos(strtolower($out),"nan")) { | |
pprint($out); | |
exit; | |
} | |
return $out; | |
} | |
$counter = 0; | |
$time = time(); | |
$minAttendance = 1000000000; | |
$maxAttendance = 0; | |
$label = -1; | |
$history = array(); | |
$mse = 0; | |
$sherlock = new \Sherlock\Sherlock(); | |
$sherlock->addNode("localhost"); | |
$indexName = 'dodgerstestc'; | |
$typeName = 'data'; | |
//create the ES index to house our prediction data | |
$mappings = array(); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Date()->field('timestamp'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('value')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('baseballgame')->type('integer'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('prediction')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('residual')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('stdresidual')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('rmse')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('variance')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('stddev')->type('float'); | |
$mappings[] = Sherlock::mappingBuilder($typeName)->Number()->field('mean')->type('float'); | |
$index = $sherlock->index($indexName)->mappings($mappings)->create(); | |
/** | |
* Load the baseball game event data. Could also load this out of ES | |
*/ | |
$events = file_get_contents('data/Dodgers.events'); | |
$events = explode("\n", $events); | |
foreach ($events as $event) { | |
$data = explode(",", $event); | |
$startTime = strtotime($data[0].' '.$data[1]); | |
//Game data is labeled during the game itself, but traffic lags about two hours after the game | |
//because that is when the game lets out. We are going to adjust our start/end times to | |
//shift the label onto the traffic spike, instead of the game | |
$endTime = strtotime("-20 minutes",round(strtotime($data[0].' '.$data[2])/300)*300); | |
$eventData[$endTime]['end'] = strtotime("+2 hours",$endTime); | |
$eventData[$endTime]['attendance'] = $data[3]; | |
if ($data[3] < $minAttendance) { | |
$minAttendance = $data[3]; | |
} | |
if ($data[3] > $maxAttendance) { | |
$maxAttendance = $data[3]; | |
} | |
} | |
/** | |
* Load up the test data | |
*/ | |
$events = file_get_contents('data/Dodgers.test'); | |
$events = explode("\n", $events); | |
foreach ($events as $event) { | |
$data = explode(",", $event); | |
$time = strtotime(trim($data[0])); | |
//if this datapoint coincides with an event timestamp, set the label to one | |
//and leave it that way until we move out of the event time range | |
//This works because all timestamps are rounded to five minute intervals | |
if (isset($eventData[$time]['end'])) { | |
$label = 1; | |
$eventEnd = $eventData[$time]['end']; | |
$eventAttendance = $eventData[$time]['attendance']; | |
} | |
if ($label == 1 && $time > $eventEnd) { | |
$label = -1; | |
} | |
//docData holds the document we are going to insert into ES | |
$docData = array(); | |
$docData['value'] = (int)trim($data[1]) / 100; | |
$docData['timestamp'] = date('Y-m-d\TH:i:sO', $time); | |
$docData['baseballgame'] = $label; | |
//facetData contains the large feature set derived from the facetedtime buckets | |
$facetData = vwFacets($sherlock, $time); | |
$vwData = ""; | |
$vwData = "|date_general "; | |
$vwData .= "date_year:".date("Y", $time)." date_year_".date("Y", $time).":1 "; | |
$vwData .= "date_month:".date("m", $time)." date_month_".date("m", $time).":1 "; | |
$vwData .= "date_day:".date("d", $time)." date_day_".date("d", $time).":1 "; | |
$vwData .= "date_dayweek:".date("N", $time)." date_dayweek_".date("N", $time).":1 "; | |
$vwData .= "date_weekyear:".date("W", $time)." date_weekyear_".date("W", $time).":1 "; | |
$vwData .= "date_dayyear:".date("z", $time)." date_dayyear_".date("z", $time).":1 "; | |
$vwData .= "date_monthyear:".date("n", $time)." date_monthyear_".date("n", $time).":1 "; | |
$vwData .= "date_hour:".date("H", $time)." date_hour_".date("H", $time).":1 "; | |
$vwData .= "date_minute:".date("i", $time)." date_minute_".date("i", $time).":1 "; | |
$vwData .= "date_ampm_".date("a", $time).":1 "; | |
$vwData .= $facetData."\n"; | |
$fp = fsockopen("localhost", 26542, $errno, $errstr, 3); | |
if (!$fp) | |
{ | |
//Something went wrong | |
fclose($fp); | |
} | |
else | |
{ | |
//write to VW's socket, get data and close | |
fwrite($fp, $vwData); | |
$returnData = trim(fgets($fp, 256)); | |
fclose($fp); | |
$returnData = explode(" ", $returnData); | |
$docData['prediction'] = $returnData[0]; | |
//residual is Actual - Predicted | |
$docData['residual'] = $docData['value'] - $docData['prediction']; | |
//history is used to calculate residual mean/variance/stddev | |
$history[] = $docData['residual']; | |
$stats = stddev($history); | |
$docData['mean'] = $stats['mean']; | |
$docData['variance'] = $stats['variance']; | |
$docData['stddev'] = $stats['stddev']; | |
$docData['stdresidual'] = ($docData['stddev'] == 0) ? 0 :($docData['residual'] - $docData['mean']) / $docData['stddev']; | |
$mse += pow($docData['residual'],2); | |
$docData['rmse'] = sqrt($mse / $counter); | |
pprint($docData); | |
//insert into ES | |
$docs = $sherlock->document()->index($indexName)->type($typeName)->document($docData)->execute(); | |
} | |
$counter += 1; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
error_reporting(E_ERROR | E_WARNING | E_PARSE); | |
require 'vendor/autoload.php'; | |
use Sherlock\Sherlock; | |
function pprint($value) { | |
print_r($value); | |
echo "\r\n"; | |
} | |
/** | |
* @param \Sherlock\Sherlock $sherlock | |
* @param $time | |
* @return string | |
*/ | |
function vwFacets($sherlock, $time) { | |
$timeFrom = date("Y-m-d\TH:i:sO", strtotime("-24 hour", $time)); | |
$timeTo = date("Y-m-d\TH:i:sO", $time); | |
$query = Sherlock::queryBuilder()->MatchAll(); | |
$filter = Sherlock::filterBuilder()->Range()->field('timestamp')->from($timeFrom)->to($timeTo); | |
$facet = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_5m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('5m') | |
->facet_filter($filter); | |
$facet2 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_10m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('10m') | |
->facet_filter($filter); | |
$facet3 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_30m_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('30m') | |
->facet_filter($filter); | |
$facet4 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_24hr_1h_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('1h') | |
->facet_filter($filter); | |
$request = $sherlock->search()->index('dodgers')->type('data')->query($query)->facets($facet, $facet2, $facet3, $facet4)->size(0); | |
$response = $request->execute(); | |
$facets = array("time_24hr_5m_bucket", | |
"time_24hr_10m_bucket", | |
"time_24hr_30m_bucket", | |
"time_24hr_1h_bucket"); | |
$out = ""; | |
foreach($facets as $facet) { | |
//namespace | |
$out .= " |".$facet; | |
$c = 0; | |
foreach ($response->facets[$facet]['entries'] as $entry) { | |
//The data granularity is 5m, so we only need to check "total" on the | |
//5min bucket, since the other metrics will equal "total | |
if ($facet === 'time_24hr_5m_bucket') { | |
$combinations = array("total"); | |
} else { | |
$combinations = array("total", "min", "max", "mean"); | |
} | |
foreach($combinations as $value) { | |
//Make sure we don't log(0) | |
if ($entry[$value] == 0) { | |
$logValue = 0; | |
} else { | |
//absolute value is taken so we don't get complex results | |
$logValue = log(abs($entry[$value]),2); | |
} | |
//log feature | |
$out .= " ".$facet."_".$value."_log_".$c.":".$logValue; | |
//linear feature | |
$out .= " ".$facet."_".$value."_".$c.":".$entry[$value]; | |
//categorical feature, rounded so more examples share the same category | |
$out .= " ".$facet."_".$value."_".$c."_".round($entry[$value]).":1"; | |
//categorical feature, no rounding so there are potentially very sparse features | |
$out .= " ".$facet."_".$value."_".$c."_".$entry[$value].":1"; | |
} | |
$c += 1; | |
} | |
} | |
$timeFrom = date("Y-m-d\TH:i:sO", strtotime("-7 days", $time)); | |
$timeTo = date("Y-m-d\TH:i:sO", $time); | |
$filter = Sherlock::filterBuilder()->Range()->field('timestamp')->from($timeFrom)->to($timeTo); | |
$facet = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_1hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('1h') | |
->facet_filter($filter); | |
$facet2 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_2hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('2h') | |
->facet_filter($filter); | |
$facet3 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_6hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('6h') | |
->facet_filter($filter); | |
$facet4 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_12hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('12h') | |
->facet_filter($filter); | |
$facet5 = Sherlock::facetBuilder()->DateHistogram()->facetname('time_7d_24hr_bucket') | |
->key_field('timestamp') | |
->value_field('value') | |
->interval('24h') | |
->facet_filter($filter); | |
$request = $sherlock->search()->index('dodgers')->type('data')->query($query)->facets($facet, $facet2, $facet3, $facet4, $facet5)->size(0); | |
$response = $request->execute(); | |
$facets = array("time_7d_1hr_bucket", | |
"time_7d_2hr_bucket", | |
"time_7d_6hr_bucket", | |
"time_7d_12hr_bucket", | |
"time_7d_24hr_bucket"); | |
//all of these facets are >5min, so we can use all combinations for each facet | |
$combinations = array("total", "min", "max", "mean"); | |
foreach($facets as $facet) { | |
//namespace | |
$out .= " |".$facet; | |
$c = 0; | |
foreach ($response->facets[$facet]['entries'] as $entry) { | |
foreach($combinations as $value) { | |
//Make sure we don't log(0) | |
if ($entry[$value] == 0) { | |
$logValue = 0; | |
} else { | |
//absolute value is taken so we don't get complex results | |
$logValue = log(abs($entry[$value]),2); | |
} | |
//log feature | |
$out .= " ".$facet."_".$value."_log_".$c.":".$logValue; | |
//linear feature | |
$out .= " ".$facet."_".$value."_".$c.":".$entry[$value]; | |
//categorical feature, rounded so more examples share the same category | |
$out .= " ".$facet."_".$value."_".$c."_".round($entry[$value]).":1"; | |
//categorical feature, no rounding so there are potentially very sparse features | |
$out .= " ".$facet."_".$value."_".$c."_".$entry[$value].":1"; | |
} | |
$c += 1; | |
} | |
} | |
if (strpos(strtolower($out),"inf") || strpos(strtolower($out),"nan")) { | |
pprint($out); | |
exit; | |
} | |
return $out; | |
} | |
$counter = 0; | |
$sherlock = new \Sherlock\Sherlock(); | |
$sherlock->addNode("localhost"); | |
$events = file_get_contents('data/Dodgers.events'); | |
$events = explode("\n", $events); | |
$minAttendance = 1000000000; | |
$maxAttendance = 0; | |
foreach ($events as $event) { | |
$data = explode(",", $event); | |
$startTime = strtotime($data[0].' '.$data[1]); | |
$endTime = strtotime("-20 minutes",round(strtotime($data[0].' '.$data[2])/300)*300); | |
$eventData[$endTime]['end'] = strtotime("+2 hours",$endTime); | |
$eventData[$endTime]['attendance'] = $data[3]; | |
if ($data[3] < $minAttendance) { | |
$minAttendance = $data[3]; | |
} | |
if ($data[3] > $maxAttendance) { | |
$maxAttendance = $data[3]; | |
} | |
} | |
$events = file_get_contents('data/Dodgers.train'); | |
$events = explode("\n", $events); | |
$eventAttendance = 0; | |
$labelBinary = -1; | |
$labelReal = 0; | |
$counter = 0; | |
$outReal = ""; | |
$outBinary = ""; | |
foreach ($events as $event) { | |
$data = explode(",", $event); | |
$time = strtotime(trim($data[0])); | |
/** | |
* if this time is in our master Dodgers event list, | |
* a baseball game has just started. Set label = 1 | |
* and don't change it until the timestamp is after the end | |
* of the game | |
*/ | |
if (isset($eventData[$time]['end'])) { | |
$labelBinary = 1; | |
$eventEnd = $eventData[$time]['end']; | |
$eventAttendance = $eventData[$time]['attendance']; | |
} | |
/** | |
* Game has ended, set label back to -1 | |
*/ | |
if ($labelBinary == 1 && $time > $eventEnd) { | |
$labelBinary = -1; | |
} | |
//label2 is used for real-valued classification, where we want the value of the cars | |
//and not baseball game | |
$labelReal = (int)trim($data[1]) / 100; | |
//obtain the various time buckets | |
$facetData = vwFacets($sherlock, $time); | |
//time is the label, then start the features | |
$out = " ".$time."|dategeneral "; | |
$out .= "date_year:".date("Y", $time)." date_year_".date("Y", $time).":1 "; | |
$out .= "date_month:".date("m", $time)." date_month_".date("m", $time).":1 "; | |
$out .= "date_day:".date("d", $time)." date_day_".date("d", $time).":1 "; | |
$out .= "date_dayweek:".date("N", $time)." date_dayweek_".date("N", $time).":1 "; | |
$out .= "date_weekyear:".date("W", $time)." date_weekyear_".date("W", $time).":1 "; | |
$out .= "date_dayyear:".date("z", $time)." date_dayyear_".date("z", $time).":1 "; | |
$out .= "date_monthyear:".date("n", $time)." date_monthyear_".date("n", $time).":1 "; | |
$out .= "date_hour:".date("H", $time)." date_hour_".date("H", $time).":1 "; | |
$out .= "date_minute:".date("i", $time)." date_minute_".date("i", $time).":1 "; | |
$out .= "date_ampm_".date("a", $time).":1 "; | |
$out .= $facetData."\n"; | |
$outReal .= $labelReal.$out; | |
//for the binary classification, we label things -1 or 1 | |
//and importance weight the positive examples | |
if ($labelBinary == 1) { | |
$weight = ($eventAttendance / $maxAttendance); | |
$outBinary .= $labelBinary." ".$weight.$out; | |
} else { | |
$outBinary .= $labelBinary." 0.01".$out; | |
} | |
echo "."; | |
if ($counter % 1000 == 0) { | |
file_put_contents("data/train.real", $outReal, FILE_APPEND); | |
$outReal = ""; | |
file_put_contents("data/train.binary", $outBinary, FILE_APPEND); | |
$outBinary = ""; | |
} | |
$counter += 1; | |
} | |
file_put_contents("data/train.real", $outReal, FILE_APPEND); | |
file_put_contents("data/train.binary", $outBinary, FILE_APPEND); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@kolalait maybe it's not relevant anymore but i found the broken site in the archive: https://web.archive.org/web/20140626120232/http://euphonious-intuition.com/2013/04/not-just-for-search-using-elasticsearch-with-machine-learning-algorithms/