Created
October 14, 2013 17:25
-
-
Save Util/6979020 to your computer and use it in GitHub Desktop.
CDR parser without outer hash to group records.
Tested, working code.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I think that this algorithm for handling the gzip'ed output files | |
# will be more efficient in speed, and vastly better in memory. | |
use Modern::Perl; | |
my $divider = 65000; | |
my ( $fileDate, $fileMinute ) = unpack 'x6 a8 x1 a4', $ARGV[0]; | |
sub format_a_records_file_counter { return sprintf '%02d', $_[0] } | |
my $RECORDS_FILE_COUNTER = 0; # Always numeric | |
my $SUFFIX = format_a_records_file_counter($RECORDS_FILE_COUNTER); # Always a formatted string of numbers. | |
# This sub also handles *all* updates to $RECORDS_FILE_COUNTER and $SUFFIX, besides initialization. | |
# They are incremented after each close of a gzip'ed records file. | |
sub write_a_records_file { | |
die if @_ != 3; | |
my ( $fileDate, $fileMinute, $records_aref ) = @_; | |
my $filename = sprintf 'SONUS_%s_%s%s.gz', | |
$fileDate, $fileMinute, format_a_records_file_counter($RECORDS_FILE_COUNTER); | |
open my $gzipFH, '|-', "gzip -5 -c > /data/sonus/cdr/$fileDate/$filename" | |
or die "error starting gzip $!"; | |
say {$gzipFH} $_ for @{$records_aref}; | |
close $gzipFH or warn; | |
$RECORDS_FILE_COUNTER++; | |
$SUFFIX = format_a_records_file_counter($RECORDS_FILE_COUNTER); | |
@{$records_aref} = (); # Clears the array that was passed in. | |
return; | |
} | |
my @temp_records; | |
while (<STDIN>) { | |
chomp; | |
# Collect records to write out | |
my $record = $_; | |
push @temp_records, $record; | |
# ... and you do a lot of other things here, but only with this one record... | |
# The omitted code uses $SUFFIX, and relies on it being formatted in a | |
# way that matches the number in the gzip'ed output record file. | |
if ( @temp_records == $divider and $RECORDS_FILE_COUNTER < 99 ) { | |
# If $RECORDS_FILE_COUNTER == 99, then just let them all accumulate, to be handled after the while() loop ends. | |
write_a_records_file( $fileDate, $fileMinute, \@temp_records ); | |
} | |
} | |
if (@temp_records) { | |
# Handles records left over when line count is not exactly divisible by $divider, | |
# or when $RECORDS_FILE_COUNTER maxes out at 99. | |
write_a_records_file( $fileDate, $fileMinute, \@temp_records ); | |
} | |
# @temp_records will always be empty and no-longer-used at this point. | |
undef $RECORDS_FILE_COUNTER; | |
undef $SUFFIX; | |
# ... lots of unaffected processing not shown ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment