Created
January 22, 2013 20:05
-
-
Save marcuswestin/4597941 to your computer and use it in GitHub Desktop.
Delete s3 bucket (with all contents), from http://stuff.mit.edu/~jik/software/delete-s3-bucket.pl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| # Copyright (c) 2010 Jonathan Kamens. | |
| # Released under the GNU General Public License, Version 3. | |
| # See <http://www.gnu.org/licenses/>. | |
| # $Id: delete-s3-bucket.pl,v 1.5 2010/10/26 14:11:09 jik Exp $ | |
| # Deleting an Amazon S3 bucket is hard. | |
| # | |
| # * You can't delete the bucket unless it is empty. | |
| # | |
| # * There is no API for telling Amazon to empty the bucket, so you have to | |
| # delete all of the objects one by one yourself. | |
| # | |
| # * If you've recently added a lot of large objects to the bucket, then they | |
| # may not all be visible yet on all S3 servers. This means that even after the | |
| # server you're talking to thinks all the objects are all deleted and lets you | |
| # delete the bucket, additional objects can continue to propagate around the S3 | |
| # server network. If you then recreate the bucket with the same name, those | |
| # additional objects will magically appear in it! | |
| # | |
| # It is not clear to me whether the bucket delete will eventually propagate to | |
| # all of the S3 servers and cause all the objects in the bucket to go away, but | |
| # I suspect it won't. I also suspect that you may end up continuing to be | |
| # charged for these phantom objects even though the bucket they're in is no | |
| # longer even visible in your S3 account. | |
| # | |
| # * If there's a CR, LF, or CRLF in an object name, then it's sent just that | |
| # way in the XML that gets sent from the S3 server to the client when the | |
| # client asks for a list of objects in the bucket. Unfortunately, the XML | |
| # parser on the client will probably convert it to the local line ending | |
| # character, and if it's different from the character that's actually in the | |
| # object name, you then won't be able to delete it. Ugh! This is a bug in the | |
| # S3 protocol; it should be enclosing the object names in CDATA tags or | |
| # something to protect them from being munged by the XML parser. | |
| # | |
| # Note that this bug even affects the AWS Web Console provided by Amazon! | |
| # | |
| # * If you've got a whole lot of objects and you serialize the delete process, | |
| # it'll take a long, long time to delete them all. | |
| use threads; | |
| use strict; | |
| use warnings; | |
| # Keys can have newlines in them, which screws up the communication | |
| # between the parent and child processes, so use URL encoding to deal | |
| # with that. | |
| use CGI qw(escape unescape); # Easiest place to get this functionality. | |
| use File::Basename; | |
| use Getopt::Long; | |
| use Net::Amazon::S3; | |
| my $whoami = basename $0; | |
| my $usage = "Usage: $whoami [--help] --access-key-id=id --secret-access-key=key | |
| --bucket=name [--processes=#] [--wait=#] [--nodelete] | |
| Specify --processes to indicate how many deletes to perform in | |
| parallel. You're limited by RAM (to hold the parallel threads) and | |
| bandwidth for the S3 delete requests. | |
| Specify --wait to indicate seconds to require the bucket to be verified | |
| empty. This is necessary if you create a huge number of objects and then | |
| try to delete the bucket before they've all propagated to all the S3 | |
| servers (I've seen a huge backlog of newly created objects take *hours* to | |
| propagate everywhere). See the comment at the top of the script for more | |
| information about this issue. | |
| Specify --nodelete to empty the bucket without actually deleting it.\n"; | |
| my($aws_access_key_id, $aws_secret_access_key, $bucket_name, $wait); | |
| my $procs = 1; | |
| my $delete = 1; | |
| die if (! GetOptions( | |
| "help" => sub { print $usage; exit; }, | |
| "access-key-id=s" => \$aws_access_key_id, | |
| "secret-access-key=s" => \$aws_secret_access_key, | |
| "bucket=s" => \$bucket_name, | |
| "processes=i" => \$procs, | |
| "wait=i" => \$wait, | |
| "delete!" => \$delete, | |
| )); | |
| die if (! ($aws_access_key_id && $aws_secret_access_key && $bucket_name)); | |
| my $increment = 0; | |
| print "Incrementally deleting the contents of $bucket_name\n"; | |
| $| = 1; | |
| my(@procs, $current); | |
| for (1..$procs) { | |
| my($read_from_parent, $write_to_child); | |
| my($read_from_child, $write_to_parent); | |
| pipe($read_from_parent, $write_to_child) or die; | |
| pipe($read_from_child, $write_to_parent) or die; | |
| threads->create(sub { | |
| close($read_from_child); | |
| close($write_to_child); | |
| my $old_select = select $write_to_parent; | |
| $| = 1; | |
| select $old_select; | |
| &child($read_from_parent, $write_to_parent); | |
| }) or die; | |
| close($read_from_parent); | |
| close($write_to_parent); | |
| my $old_select = select $write_to_child; | |
| $| = 1; | |
| select $old_select; | |
| push(@procs, [$read_from_child, $write_to_child]); | |
| } | |
| my $s3 = Net::Amazon::S3->new({aws_access_key_id => $aws_access_key_id, | |
| aws_secret_access_key => $aws_secret_access_key, | |
| retry => 1, | |
| }); | |
| my $bucket = $s3->bucket($bucket_name); | |
| my $deleted = 1; | |
| my $total_deleted = 0; | |
| my $last_start = time; | |
| my($start, $waited); | |
| while ($deleted > 0) { | |
| $start = time; | |
| print "\nLoading ", ($increment ? "up to $increment" : | |
| "as many as possible")," keys...\n"; | |
| my $response = $bucket->list({$increment ? ('max-keys' => $increment) : ()}) | |
| or die $s3->err . ": " . $s3->errstr . "\n"; | |
| $deleted = scalar(@{ $response->{keys} }) ; | |
| if (! $deleted) { | |
| if ($wait and ! $waited) { | |
| my $delta = $wait - ($start - $last_start); | |
| if ($delta > 0) { | |
| print "Waiting $delta second(s) to confirm bucket is empty\n"; | |
| sleep($delta); | |
| $waited = 1; | |
| $deleted = 1; | |
| next; | |
| } | |
| else { | |
| last; | |
| } | |
| } | |
| else { | |
| last; | |
| } | |
| } | |
| else { | |
| $waited = undef; | |
| } | |
| $total_deleted += $deleted; | |
| print "\nDeleting $deleted keys ($total_deleted total)...\n"; | |
| $current = 0; | |
| foreach my $key ( @{ $response->{keys} } ) { | |
| my $key_name = $key->{key}; | |
| while (! &send(escape($key_name) . "\n")) { | |
| print "Thread $current died\n"; | |
| die "No threads left\n" if (@procs == 1); | |
| if ($current == @procs-1) { | |
| pop @procs; | |
| $current = 0; | |
| } | |
| else { | |
| $procs[$current] = pop @procs; | |
| } | |
| } | |
| $current = ($current + 1) % @procs; | |
| threads->yield(); | |
| } | |
| print "Sending sync message\n"; | |
| for ($current = 0; $current < @procs; $current++) { | |
| if (! &send("\n")) { | |
| print "Thread $current died sending sync\n"; | |
| if ($current = @procs-1) { | |
| pop @procs; | |
| last; | |
| } | |
| $procs[$current] = pop @procs; | |
| $current--; | |
| } | |
| threads->yield(); | |
| } | |
| print "Reading sync response\n"; | |
| for ($current = 0; $current < @procs; $current++) { | |
| if (! &receive()) { | |
| print "Thread $current died reading sync\n"; | |
| if ($current = @procs-1) { | |
| pop @procs; | |
| last; | |
| } | |
| $procs[$current] = pop @procs; | |
| $current--; | |
| } | |
| threads->yield(); | |
| } | |
| } | |
| continue { | |
| $last_start = $start; | |
| } | |
| if ($delete) { | |
| print "Deleting bucket...\n"; | |
| $bucket->delete_bucket or die $s3->err . ": " . $s3->errstr; | |
| print "Done.\n"; | |
| } | |
| sub send { | |
| my($str) = @_; | |
| my $fh = $procs[$current]->[1]; | |
| print($fh $str); | |
| } | |
| sub receive { | |
| my $fh = $procs[$current]->[0]; | |
| scalar <$fh>; | |
| } | |
| sub child { | |
| my($read, $write) = @_; | |
| threads->detach(); | |
| my $s3 = Net::Amazon::S3->new({aws_access_key_id => $aws_access_key_id, | |
| aws_secret_access_key => $aws_secret_access_key, | |
| retry => 1, | |
| }); | |
| my $bucket = $s3->bucket($bucket_name); | |
| while (my $key = <$read>) { | |
| if ($key eq "\n") { | |
| print($write "\n") or die; | |
| next; | |
| } | |
| chomp $key; | |
| $key = unescape($key); | |
| if ($key =~ /[\r\n]/) { | |
| my(@parts) = split(/\r\n|\r|\n/, $key, -1); | |
| my(@guesses) = shift @parts; | |
| foreach my $part (@parts) { | |
| @guesses = (map(($_ . "\r\n" . $part, | |
| $_ . "\r" . $part, | |
| $_ . "\n" . $part), @guesses)); | |
| } | |
| foreach my $guess (@guesses) { | |
| if ($bucket->get_key($guess)) { | |
| $key = $guess; | |
| last; | |
| } | |
| } | |
| } | |
| $bucket->delete_key($key) or | |
| die $s3->err . ": " . $s3->errstr . "\n"; | |
| print "."; | |
| threads->yield(); | |
| } | |
| return; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment