Skip to content

Instantly share code, notes, and snippets.

@dmonllao
Last active April 19, 2017 06:42
Show Gist options
  • Save dmonllao/a938c77d6230ccdf6285d84615223e1f to your computer and use it in GitHub Desktop.
Save dmonllao/a938c77d6230ccdf6285d84615223e1f to your computer and use it in GitHub Desktop.
php-ml linear classifier performance (partial training vs full training in 1 batch) (https://github.com/php-ai/php-ml/pull/78)
RESULTS:
➜ php-ml git:(partial-train) ✗ clear ; php partial-vs-full.php
Classifier perceptron full train memory usage:
Usage from before data reading to end of training: 231704
Classifier adaline full train memory usage:
Usage from before data reading to end of training: 265160
Classifier logistic full train memory usage:
Usage from before data reading to end of training: 248408
Classifier perceptron partial train memory usage:
Usage from before data reading to end of training: 9176
Classifier adaline partial train memory usage:
Usage from before data reading to end of training: 86136
Classifier logistic partial train memory usage:
Usage from before data reading to end of training: 17336
SCRIPT (checkout partial-train branch)
<?php
require __DIR__ . '/vendor/autoload.php';
use Phpml\Classification\Linear\Perceptron;
use Phpml\Classification\Linear\Adaline;
use Phpml\Classification\Linear\LogisticRegression;
define('BATCH_SIZE', 10);
$classifiers['perceptron'] = new Perceptron();
$classifiers['adaline'] = new Adaline();
$classifiers['logistic'] = new LogisticRegression();
foreach ($classifiers as $name => $classifier) {
fullRun($name, $classifier);
}
$classifiers['perceptron'] = new Perceptron();
$classifiers['adaline'] = new Adaline();
$classifiers['logistic'] = new LogisticRegression();
foreach ($classifiers as $name => $classifier) {
partialRun($name, $classifier);
}
function fullRun($name, $classifier) {
$filepath = __DIR__ . '/data/wine.csv';
$handle = fopen($filepath, 'rb');
$dontCareAboutHeaders = fgetcsv($handle, 1000);
echo 'Classifier ' . $name . ' full train memory usage: ' . PHP_EOL;
$membefore = memory_get_usage(false);
$samples = [];
$targets = [];
$features = null;
while (($data = fgetcsv($handle, 1000)) !== false) {
if ($features == null) {
$features = count($data) - 1;
}
$samples[] = array_slice($data, 0, $features);
$targets[] = $data[$features];
}
fclose($handle);
$mem1 = memory_get_usage(false);
echo ' Usage during data reading: ' . ($mem1 - $membefore) . PHP_EOL;
$classifier->train($samples, $targets);
$mem2 = memory_get_usage(false);
echo ' Usage during training: ' . ($mem2 - $mem1) . PHP_EOL;
echo ' Usage from before data reading to end of training: ' . ($mem2 - $membefore) . PHP_EOL;
}
function partialRun($name, $classifier) {
$filepath = __DIR__ . '/data/wine.csv';
$handle = fopen($filepath, 'rb');
$dontCareAboutHeaders = fgetcsv($handle, 1000);
echo 'Classifier ' . $name . ' partial train memory usage: ' . PHP_EOL;
$membefore = memory_get_usage(false);
$samples = [];
$targets = [];
$features = null;
while (($data = fgetcsv($handle, 1000)) !== false) {
if ($features == null) {
$features = count($data) - 1;
}
$samples[] = array_slice($data, 0, $features);
$targets[] = $data[$features];
if (count($samples) === BATCH_SIZE) {
$mem1 = memory_get_usage(false);
echo ' Usage reading data batch: ' . ($mem1 - $membefore) . PHP_EOL;
$classifier->partialTrain($samples, $targets, array(1, 2, 3));
$mem2 = memory_get_usage(false);
echo ' Usage during partial training: ' . ($mem2 - $mem1) . PHP_EOL;
$samples = [];
$targets = [];
}
}
fclose($handle);
echo ' Usage from before data reading to end of training: ' . (memory_get_usage(false) - $membefore) . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment