Created
June 18, 2012 16:42
-
-
Save preaction/2949336 to your computer and use it in GitHub Desktop.
MooseX::Runnable module to archive/delete files in a directory based on size, age, and/or total size rules.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package CleanupDir; | |
| use Moose; | |
| with 'MooseX::Getopt'; | |
| with 'MooseX::Runnable'; | |
| use File::Find; | |
| use File::stat; | |
| use autodie qw( :system ); | |
| use Log::Log4perl qw( :easy ); | |
| Log::Log4perl->easy_init; | |
| use Moose::Util::TypeConstraints; | |
| my %AGE_TYPES = (); | |
| $AGE_TYPES{d} = 60*60*24; | |
| $AGE_TYPES{w} = $AGE_TYPES{d} * 7; | |
| $AGE_TYPES{m} = $AGE_TYPES{d} * 30; | |
| $AGE_TYPES{y} = $AGE_TYPES{w} * 52; | |
| subtype 'FileAge' => ( | |
| as 'Int', # File age is always stored in seconds | |
| ); | |
| coerce 'FileAge' => ( | |
| from 'Str' => via { | |
| my $age = lc $_; | |
| my ( $int, $type ) = $age =~ /(\d+)(d|m|w|y)/; | |
| die "Invalid age type '$type'" unless exists $AGE_TYPES{$type}; | |
| return $int * $AGE_TYPES{$type}; | |
| }, | |
| ); | |
| my %SIZE_TYPES = ( | |
| k => 1024 ** 1, | |
| m => 1024 ** 2, | |
| g => 1024 ** 3, | |
| t => 1024 ** 4, | |
| ); | |
| subtype 'FileSize' => ( | |
| as 'Int', # File size is always stored in bytes | |
| ); | |
| coerce 'FileSize' => ( | |
| from 'Str' => via { | |
| my $size = lc $_; | |
| my ( $int, $type ) = $size =~ /(\d+)(k|m|g|t)/; | |
| die "Invalid size type '$type'" unless exists $SIZE_TYPES{$type}; | |
| return $int * $SIZE_TYPES{$type}; | |
| }, | |
| ); | |
| MooseX::Getopt::OptionTypeMap->add_option_type_to_map( | |
| 'FileAge' => '=s', | |
| ); | |
| MooseX::Getopt::OptionTypeMap->add_option_type_to_map( | |
| 'FileSize' => '=s', | |
| ); | |
| has 'delete' => ( | |
| is => 'ro', | |
| isa => 'Bool', | |
| trigger => exclusive_with( 'delete', 'compress' ), | |
| ); | |
| has 'compress' => ( | |
| is => 'ro', | |
| isa => 'Bool', | |
| trigger => exclusive_with( 'delete', 'compress' ), | |
| ); | |
| has 'stdin' => ( | |
| is => 'ro', | |
| isa => 'Bool', | |
| ); | |
| has 'age' => ( | |
| is => 'ro', | |
| isa => 'FileAge', | |
| coerce => 1, | |
| ); | |
| has 'size' => ( | |
| is => 'ro', | |
| isa => 'FileSize', | |
| coerce => 1, | |
| ); | |
| has 'total_size' => ( | |
| is => 'ro', | |
| isa => 'FileSize', | |
| coerce => 1, | |
| ); | |
| sub run { | |
| my ( $self, @argv ) = @_; | |
| die "Must specify one of --delete or --compress" | |
| unless $self->delete || $self->compress; | |
| # Create an iterator for all files passed in | |
| my $iter; $iter = sub { | |
| # Execute a coderef to get files one at a time, like <> | |
| if ( ref $argv[0] eq 'CODE' ) { | |
| my $output = $argv[0]->(); | |
| if ( defined $output ) { | |
| return $output; | |
| } | |
| else { | |
| shift @argv; # Done with the sub | |
| # ... and repeat | |
| return $iter->(); | |
| } | |
| } | |
| return shift @argv; | |
| }; | |
| # Add a coderef to the iterator to get files from STDIN one at a time | |
| if ( $self->stdin ) { | |
| push @argv, sub { my $file = <STDIN>; return unless $file; chomp $file; return $file; }; | |
| } | |
| # Scan inputs and gather data | |
| my $now = time; | |
| my $total_size = 0; | |
| my @files; | |
| while ( my $file = $iter->() ) { | |
| next if $file =~ /^[.]/; # Do not do hidden files (this may be optional later) | |
| next if $file =~ /^[.]nfs/; # Absolutely do not do NFS special files... | |
| if ( -d $file ) { | |
| # Add all the regular files from this directory | |
| find( sub { -f && push @argv, $File::Find::name }, $file ); | |
| } | |
| elsif ( -f $file ) { | |
| # Gather our desired data | |
| my $size = -s $file; | |
| my $age = $now - stat( $file )->mtime; | |
| push @files, { | |
| name => $file, | |
| size => $size, | |
| age => $age, | |
| }; | |
| $total_size += $size; | |
| } | |
| } | |
| INFO "Total size: $total_size"; | |
| # Perform the desired actions | |
| # First age and size | |
| my @actionable = grep { ( $self->size && $_->{size} >= $self->size ) || ( $self->age && $_->{age} >= $self->age ) } @files; | |
| for my $file ( @actionable ) { | |
| $total_size += $self->_cleanup( $file ); | |
| } | |
| # Now, if we're still over the total size, start acting on old files | |
| if ( $self->total_size && $total_size > $self->total_size ) { | |
| my @old_files = sort { $b->{age} <=> $a->{age} } @files; | |
| my $old_idx = 0; | |
| while ( $total_size > $self->total_size && $old_idx < @old_files ) { | |
| $total_size += $self->_cleanup( $old_files[$old_idx] ); | |
| $old_idx++; | |
| } | |
| } | |
| return 0; | |
| } | |
| # Do the desired cleanup action, return the difference in total size after | |
| # the action | |
| sub _cleanup { | |
| my ( $self, $file ) = @_; | |
| my $size_adjust = 0; | |
| if ( $self->delete ) { | |
| INFO "Deleting " . $file->{name}; | |
| unlink $file->{name} or die sprintf "Could not unlink '%s': %s", $file->{name}, $!; | |
| $size_adjust -= $file->{size}; | |
| } | |
| elsif ( $self->compress && $file->{name} !~ /[.]gz$/ ) { | |
| INFO "Compressing " . $file->{name}; | |
| system( "gzip", $file->{name} ); | |
| $size_adjust -= $file->{size}; | |
| $size_adjust += -s ($file->{name}.".gz"); | |
| } | |
| return $size_adjust; | |
| } | |
| # Dies if any more than one of the given attrs have been set | |
| sub exclusive_with { | |
| my ( @attrs ) = @_; | |
| return sub { | |
| my ( $self ) = @_; | |
| my $found = scalar grep { defined $self->$_ } @attrs; | |
| die sprintf( "Cannot set more than one of %s", join( ", ", @attrs ) ) if $found > 1; | |
| }; | |
| } | |
| 1; | |
| __END__ | |
| =head1 NAME | |
| CleanupDir - Clean up a directory by compressing or deleting files based on rules | |
| =head1 SYNOPSIS | |
| mx-run CleanupDir <--delete|--compress> [--age <age>] [--size <size>] [--total_size <size>] <file|dir>... --stdin | |
| mx-run -h | |
| =head1 DESCRIPTION | |
| This module will clean up a directory (or a list of files) by compressing or | |
| deleting files if they are too old, if they are too big, or if the total size | |
| of all the files exceeds a certain amount. | |
| A good use-case is to run this as two entries in a crontab. Once to compress | |
| files at certain thresholds, another time to delete them (with higher | |
| thresholds). | |
| =head1 ARGUMENTS | |
| =head2 file | |
| A file to check. | |
| =head2 dir | |
| A directory to descend into recursively, checking all regular files inside. | |
| =head1 OPTIONS | |
| =head2 stdin | |
| Read the list of file/dir names from stdin. Useful for piping output from C<find>. | |
| =head2 age | |
| The allowable age of the file, either in seconds or with units | |
| 1y - 1 year | |
| 14d - 14 days | |
| 2w - 2 weeks | |
| 4m - 4 months | |
| =head2 size | |
| The allowable size of a single file, either in bytes or with units | |
| 1m - 1 megabyte | |
| 10k - 10 kilobytes | |
| 2g - 2 gigabytes | |
| =head2 total_size | |
| The allowable size of all files in all directories passed in. Will | |
| compress/delete the oldest files until under this C<total_size>. Takes the same | |
| format as C<size>. | |
| =head2 compress|gzip|z | |
| Compress any files above the thresholds. Exclusive with C<delete>. Either this | |
| or C<delete> is required. | |
| =head2 delete|d | |
| Delete any files above the thresholds. Exclusive with C<compress>. Either this | |
| or C<compress> is required. | |
| =head1 AUTHOR | |
| Doug Bell | |
| =head1 LICENSE | |
| Copyright 2012 Doug Bell (preaction) | |
| This module is free software; you can redistribute it and/or modify it under | |
| the same terms as Perl 5.14.2. | |
| This program is distributed in the hope that it will be | |
| useful, but without any warranty; without even the implied | |
| warranty of merchantability or fitness for a particular purpose. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment