Skip to content

Instantly share code, notes, and snippets.

@dewey
Last active November 15, 2025 12:44
Show Gist options
  • Select an option

  • Save dewey/c402354c33e31cdd6e1f53d5a9e82f05 to your computer and use it in GitHub Desktop.

Select an option

Save dewey/c402354c33e31cdd6e1f53d5a9e82f05 to your computer and use it in GitHub Desktop.
Quick Go script to delete vaults from Amazon Glacier. More info: https://blog.notmyhostna.me/posts/cleaning-up-amazon-glacier
package main
import (
"bufio"
"context"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"sync"
"sync/atomic"
"time"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/glacier"
)
// InventoryOutput represents the structure of the AWS Glacier inventory output
type InventoryOutput struct {
VaultARN string `json:"VaultARN"`
InventoryDate string `json:"InventoryDate"`
ArchiveList []struct {
ArchiveId string `json:"ArchiveId"`
ArchiveDescription string `json:"ArchiveDescription"`
CreationDate string `json:"CreationDate"`
Size int64 `json:"Size"`
SHA256TreeHash string `json:"SHA256TreeHash"`
} `json:"ArchiveList"`
}
func main() {
// Command line flags
accountID := flag.String("account-id", "", "AWS Account ID")
region := flag.String("region", "", "AWS Region")
vaultName := flag.String("vault-name", "", "Glacier Vault Name")
inputFile := flag.String("input", "output.json", "Input JSON file with inventory")
workers := flag.Int("workers", 3, "Number of concurrent workers")
delayMs := flag.Int("delay", 100, "Delay between requests in milliseconds")
flag.Parse()
// Validate required parameters
if *accountID == "" || *region == "" || *vaultName == "" {
fmt.Println("Missing required parameters!")
fmt.Println("Usage:")
flag.PrintDefaults()
fmt.Println("\nEnvironment variables can also be used:")
fmt.Println(" AWS_ACCOUNT_ID")
fmt.Println(" AWS_REGION")
fmt.Println(" AWS_VAULT_NAME")
os.Exit(1)
}
// Check for environment variables as fallback
if *accountID == "" {
*accountID = os.Getenv("AWS_ACCOUNT_ID")
}
if *region == "" {
*region = os.Getenv("AWS_REGION")
}
if *vaultName == "" {
*vaultName = os.Getenv("AWS_VAULT_NAME")
}
log.Printf("Starting AWS Glacier cleanup at %s\n", time.Now().Format(time.RFC3339))
log.Printf("Account ID: %s, Region: %s, Vault: %s\n", *accountID, *region, *vaultName)
// Extract archive IDs from input file or use existing list
archiveIDFile := "output-archive-ids.txt"
archiveIDs, err := getArchiveIDs(*inputFile, archiveIDFile)
if err != nil {
log.Fatalf("Failed to get archive IDs: %v", err)
}
log.Printf("Total archives to delete: %d\n", len(archiveIDs))
if len(archiveIDs) == 0 {
log.Println("No archives to delete. Exiting.")
return
}
// Initialize AWS SDK
ctx := context.Background()
cfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(*region))
if err != nil {
log.Fatalf("Failed to load AWS config: %v", err)
}
client := glacier.NewFromConfig(cfg)
// Create a progress file to track deleted archives
progressFile := "deleted-archive-ids.txt"
deletedSet := loadDeletedArchives(progressFile)
// Filter out already deleted archives
remainingArchives := []string{}
for _, id := range archiveIDs {
if !deletedSet[id] {
remainingArchives = append(remainingArchives, id)
}
}
log.Printf("Archives already deleted: %d\n", len(archiveIDs)-len(remainingArchives))
log.Printf("Archives remaining: %d\n", len(remainingArchives))
if len(remainingArchives) == 0 {
log.Println("All archives already deleted. Exiting.")
return
}
// Open progress file for appending
progress, err := os.OpenFile(progressFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatalf("Failed to open progress file: %v", err)
}
defer progress.Close()
// Set up concurrent deletion
var wg sync.WaitGroup
archiveChan := make(chan string, *workers)
var completed, failed atomic.Int64
var progressMutex sync.Mutex
// Calculate 5% threshold for progress logging
progressThreshold := len(remainingArchives) / 20 // 5%
if progressThreshold == 0 {
progressThreshold = 1
}
// Start worker goroutines
for i := 0; i < *workers; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for archiveID := range archiveChan {
// Retry logic with exponential backoff
var err error
maxRetries := 5
for attempt := 0; attempt < maxRetries; attempt++ {
if attempt > 0 {
backoff := time.Duration(attempt*attempt) * time.Second
time.Sleep(backoff)
}
err = deleteArchive(ctx, client, *accountID, *vaultName, archiveID)
if err == nil {
break
}
}
if err != nil {
failed.Add(1)
} else {
completed.Add(1)
// Write to progress file
progressMutex.Lock()
fmt.Fprintln(progress, archiveID)
progressMutex.Unlock()
// Log progress every 5%
current := completed.Load()
if current%int64(progressThreshold) == 0 {
log.Printf("Progress: %d/%d deleted, %d failed (%.1f%% complete) - %s\n",
current,
len(remainingArchives),
failed.Load(),
float64(current)/float64(len(remainingArchives))*100,
time.Now().Format(time.RFC3339))
}
}
// Add delay between requests to avoid rate limiting
if *delayMs > 0 {
time.Sleep(time.Duration(*delayMs) * time.Millisecond)
}
}
}(i)
}
// Feed archives to workers
for _, archiveID := range remainingArchives {
archiveChan <- archiveID
}
close(archiveChan)
wg.Wait()
log.Printf("Finished at %s\n", time.Now().Format(time.RFC3339))
log.Printf("Total deleted: %d, Failed: %d\n", completed.Load(), failed.Load())
log.Printf("Deleted archive IDs are in %s\n", progressFile)
}
// getArchiveIDs extracts archive IDs from the inventory JSON file
// If a cached list exists, it uses that instead
func getArchiveIDs(inputFile, cacheFile string) ([]string, error) {
// Check if cached list exists
if _, err := os.Stat(cacheFile); err == nil {
log.Printf("Using existing archive ID list from %s\n", cacheFile)
return readArchiveIDsFromFile(cacheFile)
}
log.Printf("Extracting archive IDs from %s...\n", inputFile)
data, err := os.ReadFile(inputFile)
if err != nil {
return nil, fmt.Errorf("failed to read input file: %w", err)
}
var inventory InventoryOutput
if err := json.Unmarshal(data, &inventory); err != nil {
return nil, fmt.Errorf("failed to parse JSON: %w", err)
}
archiveIDs := make([]string, 0, len(inventory.ArchiveList))
for _, archive := range inventory.ArchiveList {
archiveIDs = append(archiveIDs, archive.ArchiveId)
}
// Save to cache file
if err := writeArchiveIDsToFile(cacheFile, archiveIDs); err != nil {
log.Printf("Warning: Failed to write cache file: %v\n", err)
}
return archiveIDs, nil
}
// readArchiveIDsFromFile reads archive IDs from a text file (one per line)
func readArchiveIDsFromFile(filename string) ([]string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
var ids []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
ids = append(ids, line)
}
}
return ids, scanner.Err()
}
// writeArchiveIDsToFile writes archive IDs to a text file (one per line)
func writeArchiveIDsToFile(filename string, ids []string) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
writer := bufio.NewWriter(file)
for _, id := range ids {
fmt.Fprintln(writer, id)
}
return writer.Flush()
}
// loadDeletedArchives loads the set of already deleted archives from the progress file
func loadDeletedArchives(filename string) map[string]bool {
deleted := make(map[string]bool)
file, err := os.Open(filename)
if err != nil {
// File doesn't exist yet, return empty set
return deleted
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
deleted[line] = true
}
}
return deleted
}
// deleteArchive deletes a single archive from the vault
func deleteArchive(ctx context.Context, client *glacier.Client, accountID, vaultName, archiveID string) error {
input := &glacier.DeleteArchiveInput{
AccountId: &accountID,
VaultName: &vaultName,
ArchiveId: &archiveID,
}
_, err := client.DeleteArchive(ctx, input)
return err
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment