-
-
Save chrisjm/1487695 to your computer and use it in GitHub Desktop.
s3-delete-files.rb -- Quickly delete a large amount of Amazon AWS S3 files given a prefix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# s3-delete-files.rb | |
# Fog-based script for deleting a large amount of Amazon AWS S3 files given a prefix (~100 files/second) | |
# Forked from: https://gist.github.com/1383301 | |
require 'rubygems' | |
require 'thread' | |
require 'fog' | |
# Ensure a bucket is specified | |
if ARGV.count < 2 | |
raise "Specify a bucket and prefix (eg ./s3-delete-files.rb my-bucket 'prefix/directory')" | |
return | |
end | |
# Set up threads and variables. | |
bucket_name = ARGV[0] | |
prefix = ARGV[1] | |
thread_count = 20 | |
threads = [] | |
queue = Queue.new | |
semaphore = Mutex.new | |
total_listed = 0 | |
total_deleted = 0 | |
puts "== Deleting files in '#{bucket_name}' from '#{prefix}' ==" | |
# Create new Fog S3. Make sure the credentials are available from ENV. | |
s3 = Fog::Storage::AWS.new(:aws_access_key_id => "#{ENV['AMAZON_ACCESS_KEY_ID']}", :aws_secret_access_key => "#{ENV['AMAZON_SECRET_ACCESS_KEY']}") | |
# Fetch the files for the bucket. | |
threads << Thread.new do | |
Thread.current[:name] = "get files" | |
puts "...started thread '#{Thread.current[:name]}'...\n" | |
# Get all the files from this bucket. Fog handles pagination internally. | |
s3.directories.get("#{bucket_name}").files.all({:prefix => prefix}).each do |file| | |
# Add this file to the queue. | |
queue.enq(file) | |
total_listed += 1 | |
end | |
# Add a final EOF message to signal the deletion threads to stop. | |
thread_count.times {queue.enq(:EOF)} | |
end | |
# Delete all the files in the queue until EOF with N threads. | |
thread_count.times do |count| | |
threads << Thread.new(count) do |number| | |
Thread.current[:name] = "delete files(#{number})" | |
puts "...started thread '#{Thread.current[:name]}'...\n" | |
# Dequeue until EOF. | |
file = nil | |
while file != :EOF | |
# Dequeue the latest file and delete it. (Will block until it gets a new file.) | |
file = queue.deq | |
file.destroy unless file == :EOF | |
# Increment the global synchronized counter. | |
semaphore.synchronize {total_deleted += 1} | |
puts "Deleted #{total_deleted} out of #{total_listed}\n" if (rand(100) == 1) | |
end | |
end | |
end | |
# Wait for the threads to finish. | |
threads.each do |t| | |
begin | |
t.join | |
rescue RuntimeError => e | |
puts "Failure on thread #{t[:name]}: #{e.message}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment