Skip to content

Instantly share code, notes, and snippets.

@72squared
Created May 31, 2012 16:16
Show Gist options
  • Select an option

  • Save 72squared/2844501 to your computer and use it in GitHub Desktop.

Select an option

Save 72squared/2844501 to your computer and use it in GitHub Desktop.
load generator for elasticsearch ... needs php5.3 with curl, and git://github.com/gaiaops/gaia_core_php.git
#!/usr/bin/env php
<?php
# to get the included library, you need php5.3 with curl installed.
# then do:
# git clone git://github.com/gaiaops/gaia_core_php.git
# this gives you the http library used in this test script:
# https://github.com/gaiaops/gaia_core_php/blob/master/lib/gaia/http/request.php
# https://github.com/gaiaops/gaia_core_php/blob/master/lib/gaia/http/pool.php
#
# This include path should point to the git repo cloned in the instructions above.
include __DIR__ . '/gaia_core_php/autoload.php';
# long running script, potentially.
# make sure it doesn't run into a script timeout issue.
set_time_limit(0);
# set up the options for the script.
$longopts = array(
'host:'=>'example: 127.0.0.1:9200',
'prefix:'=>'test',
'store::' => 'how many keys to store',
'fetch::' => 'how many keys to fetch',
'fetchowner::' => 'how many keys to fetch for owner 2i',
'testdata::' => 'a string of data to add to each record',
'keyrange::' => 'generate the keys from this range. if it is a scalar value == sequential, then it will create them in serial order starting with 1' . "\n\t" . 'defaults to 1-1000000',
'parallel::' => 'how many http requests to run in parallel. default is 1, but should probably do 50 or 100',
'amountrange::' => 'range of amount values in the data. default 1,100',
'ownerrange::' => 'range of owner values in the data. default 1, 10000',
'verbose::' => 'print debug ... if not passed, prints a period for every 1000 requests made. otherwise it prints the record fetched or stored. if you pass --verbose=2, it prints the http request and response information.',
);
$_OPTS = getopt($shortopts = '', array_keys($longopts));
# no options, print help
if( ! $_OPTS ){
echo "\n" . 'ARGUMENTS';
foreach( $longopts as $arg => $desc) print "\n $arg: $desc";
$file = './' . basename(__FILE__);
print "\nEXAMPLES:\n";
print "\n $file --host=elasticsearchcluster1:9200 --store=10000 --keyrange=1,10000000 --amountrange=1,1000 --testdata='hello world' --parallel=10 --verbose=1";
print "\n stores 10K keys randomly picking from the key range 1 thru 10 million, populates test data using the amount range.";
print "\n $file --host=elasticsearchcluster1:9200 --store=1000 --keyrange=sequential --amountrange=1,1000 --testdata='hello world' --parallel=10";
print "\n stores 1K keys by writing them in sequential order";
print "\n $file--host=elasticsearchcluster1:9200 --fetch=10000 --keyrange=1,10000 --parallel=10";
print "\n grabs 10K keys, 10 at a time, randomly from the key range 1-10000. some duplicate requests bound to happen.";
print "\n $file--host=elasticsearchcluster1:9200 --fetchowner=10000 --ownerrange=1,100 --parallel=10";
print "\n grabs 10K keys, 10 at a time reading from the 2i index on owner. uses ownerrange to determine which 2i index to read from.";
echo "\n";
exit(1);
}
# populate defaults for the arguments passed into the script.
$host = isset( $_OPTS['host'] ) ? $_OPTS['host'] : '127.0.0.1:9200';
$prefix = isset( $_OPTS['prefix'] ) ? $_OPTS['prefix'] : 'test';
$store = isset( $_OPTS['store'] ) ? $_OPTS['store'] : 0;
$fetch = isset( $_OPTS['fetch'] ) ? $_OPTS['fetch'] : 0;
$fetchowner = isset( $_OPTS['fetchowner'] ) ? $_OPTS['fetchowner'] : 0;
$testdata = isset( $_OPTS['testdata'] ) ? $_OPTS['testdata'] : '';
$sequential = isset( $_OPTS['sequential'] ) ? 1: 0;
$verbose = isset( $_OPTS['verbose'] ) ? $_OPTS['verbose'] : 0;
$parallel = isset( $_OPTS['parallel'] ) ? $_OPTS['parallel'] : 1;
$keyrange = isset( $_OPTS['keyrange'] ) ? explode(',',$_OPTS['keyrange']) : array(1,1000000);
$amountrange = isset( $_OPTS['amountrange'] ) ? explode(',',$_OPTS['amountrange']) : array(1,100);
$ownerrange = isset( $_OPTS['ownerrange'] ) ? explode(',',$_OPTS['ownerrange']) : array(1,1000);
$errors = 0;
$error_limit = 10;
$i = 0;
# http pool object for creating parallel requests.
$pool = new \Gaia\Http\Pool;
# attach a global handler for the http response.
# will blow up, toss an exception if we get a 500 response or a non-response.
$pool->attach( function( $http ) use ($verbose, & $errors, $error_limit ) {
if( $verbose > 1 ) {
print_R( $http->response );
}
$res = $http->response;
if( $res->http_code < 200 || $res->http_code > 499 ){
$e = new \Gaia\Exception('error', $http );
$errors++;
if( $errors > $error_limit ) throw $e;
echo $e;
echo print_r( $e->getDebug() );
sleep(1);
}
} );
# closure for creating a new entry.
# returns an http request that can be added to a request pool.
$storeentry = function () use (&$i, $host, $prefix, $keyrange, $ownerrange, $amountrange, $testdata, $verbose ) {
$i++;
$key = ( $keyrange[0]=='sequential' ) ? $i : mt_rand($keyrange[0],$keyrange[1]);
$http = new \Gaia\Http\Request("http://$host/test/$prefix/$key");
$data = array('owner'=>mt_rand($ownerrange[0],$ownerrange[1]), 'amount'=>mt_rand($amountrange[0], $amountrange[1]), 'testdata'=>$testdata );
$http->post = json_encode($data);
$http->method = 'PUT';
if( $verbose ) printf("\n$key -- owner: %d,\tamount: %d\n", $data['owner'], $data['amount']);
if( ! $verbose && $i % 1000 == 1) print ".";
return $http;
};
# closure for reading an entry
# returns an http request that can be added to a request pool.
$fetchentry = function ($key = NULL ) use (&$i, $host, $prefix, $keyrange, $verbose ) {
$i++;
if( $key === NULL ) $key = ( $keyrange[0]=='sequential' ) ? $i : mt_rand($keyrange[0],$keyrange[1]);
$http = new \Gaia\Http\Request("http://$host/test/$prefix/$key");
if( $verbose ) {
$http->handle = function ( $response ) use ( $key ){
$data = @json_decode($response->body, TRUE );
if( is_array( $data ) ) printf("\n$key -- owner: %d,\tamount: %d\n", $data['_source']['owner'], $data['_source']['amount']);
};
}
if( ! $verbose && $i % 1000 == 1) print ".";
return $http;
};
# closure for reading the owner index.
# returns an http request.
$fetchownerentry = function () use (&$i, $host, $prefix, $fetchowner, $fetchentry, $ownerrange, $verbose, $pool, $parallel ) {
$key = mt_rand($ownerrange[0],$ownerrange[1]);
$request = new \Gaia\Http\Request("http://$host/test/$prefix/_search?q=owner:$key");
$request->build = function ( $request, array & $opts )use( &$i, $fetchowner, $fetchentry,$regex, $pool, $parallel ){
$regex = '#'.preg_quote('{"keys":[').'(.+?)\]\}#';
$r = $request->response;
$opts[CURLOPT_WRITEFUNCTION] = function ( $ch, $data ) use( &$i, $fetchowner, $fetchentry, $r, $regex, $pool, $parallel) {
static $ok;
if( ! isset( $ok )){
$top_header = trim($r->response_header );
$top_header = substr( $top_header, 0, strpos( $top_header, "\n"));
$ok = preg_match("#200 OK#i", $top_header );
}
if( $ok ) {
$r->body .= $data;
while (preg_match($regex, $r->body, $matches)) {
$r->body = substr($r->body, strlen($matches[0]));
$result = json_decode($matches[0], TRUE);
foreach( $result['keys'] as $key ){
$pool->add( $fetchentry( $key ) );
if( count( $pool->requests() ) > $parallel ) $pool->finish();
if( $i >= $fetchowner ) return FALSE;
if( $res === FALSE ) {
$ok = FALSE;
return FALSE;
}
}
}
}
return strlen( $data );
};
};
return $request;
};
# this is the main program.
try {
$i = 0;
if( $store > 0 ){
while( $i < $store ){
while( count( $pool->requests() ) < $parallel ) $pool->add( $http = $storeentry() );
$pool->select($secs = 1);
}
$pool->finish();
print "\nSTORED: $i\n";
}
$i = 0;
if( $fetch > 0 ){
while( $i < $fetch ){
while( count( $pool->requests() ) < $parallel ) $pool->add( $http = $fetchentry() );
$pool->select($secs = 1);
}
$pool->finish();
print "\nFETCHED: $i\n";
}
$i = 0;
if( $fetchowner > 0 ){
while( $i < $fetchowner ) $fetchownerentry()->send();
print "\nFETCHED OWNERS: $i\n";
}
} catch( Exception $e ){
echo $e;
echo print_r( $e->getDebug() );
}
print "\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment