-
-
Save Randommood/1368072 to your computer and use it in GitHub Desktop.
Description here: http://blog.zawodny.com/2011/03/06/mongodb-pre-splitting-for-faster-data-loading-and-importing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
# This script will generate the commands used to preshard a collection | |
# in mongodb. | |
# | |
# See: | |
# http://www.mongodb.org/display/DOCS/Splitting+Chunks | |
# http://www.mongodb.org/display/DOCS/Moving+Chunks | |
# ranges | |
my $min_id = 1; | |
my $max_id = 2**31; | |
my $num_shards = 3; | |
# namespace stuff | |
my $db = 'archive'; | |
my $collection = 'postings'; | |
my $shard_key = 'PostingID'; | |
my $shard_prefix = 'archive' | |
; | |
# sizes | |
my $avg_doc_size = 2200; # bytes | |
my $chunk_size = 200 * 1024 * 1024; # bytes, 200MB default in mongo | |
# figure out num items per chunk based on above | |
my $ids_per_chunk = int($chunk_size/$avg_doc_size); | |
# spit out info | |
print "// $db.$collection sharded on $shard_key, $ids_per_chunk per chunk\n"; | |
print "use admin;\n"; | |
# emit the commands | |
my $id = $min_id; | |
my $count = 0; | |
# let's plan for the future a bit too (increase by 20%) | |
$max_id = int($max_id * 1.20); | |
while (1) { | |
$id += $ids_per_chunk; | |
last if $id > $max_id; | |
split_chunk($id); | |
my $shard_num = $count % $num_shards; | |
move_chunk($id, $shard_num); | |
$count++; | |
} | |
print "// $count total chunks\n"; # 20,971 | |
exit; | |
# db.runCommand({ split: "$db.$collection", | |
# middle: { $shard_key: $id } }) | |
sub split_chunk { | |
my ($id) = @_; | |
my $op = qq[db.runCommand({split: "$db.$collection", middle: { $shard_key: $id } })]; | |
print "$op;\n"; | |
} | |
# db.runCommand({moveChunk: "$db.$collection", | |
# find: { $shard_key: $id }, | |
# to: "$shard" }) | |
sub move_chunk { | |
my ($id, $shard_num) = @_; | |
my $shard_name = shard_to_name($shard_num); | |
my $op = qq[db.runCommand({moveChunk: "$db.$collection", find: { $shard_key: $id }, to: "$shard_name"})]; | |
print "$op;\n"; | |
} | |
sub shard_to_name { | |
my ($shard_num) = @_; | |
return $shard_prefix . sprintf "%03d", $shard_num+1; | |
} | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment