Last active
August 29, 2015 14:10
-
-
Save gullevek/43378a8c5230acfcf84b to your computer and use it in GitHub Desktop.
perl and python module for progress print out in percent, rows, lines per second, etc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package Progress; | |
# AUTHOR: Clemens Schwaighofer | |
# DATE CREATED: 2009/6/16 | |
# DESCRIPTION: progress percent class | |
# METHODS | |
# * init | |
# my $prg = Progress->new(); | |
# will init a new progress class in the var $prg | |
# the following parameters can be set directly during a new call | |
# - verbose (1/0) | |
# - precision (-1~10) | |
# - wide_time (0/1) | |
# - microtime (0/1) | |
# setting is done via | |
# my $prg = Progress->new(verbose => 1, microtime = 1); | |
# * setting methods | |
# verbose($level int) | |
# $level has to be int, if not set there is no output show, at least 1 has to be given to see visible output | |
# precision($decimals int) | |
# $decimals has to be int, if set to -1 then the steps are done in 10 increase, else it sets how many decimals are visible, 0 for no decimals | |
# wide_time(0/1 int) | |
# sets the flag for wide time, if set to 1 the estimated time to end and time run is left prefixed with 15 chars | |
# microtime(0/1 int) | |
# sets the flag to always show microtime (1) or only if the previous time was the same (0) | |
# reset() | |
# resets all the internal vars for another new run | |
# SetStartTime(optional timestamp) | |
# sets the start times for this progress run, the overall start/end time is set, and the time used for the actual progress | |
# in case there is some processing done before the run starts, it is highly recommended to call SetETAStartTime before the actual processing starts | |
# if no timestamp is given, internal timestamp is used (this is recommended) | |
# SetETAStartTime(optional timestamp) | |
# only sets the start/end time for the actual "estimated time" calculation. It is recommended to call this right before the processing loop starts | |
# eg if there is a big query running that takes a lot of time, this method should be called before the reading loop | |
# as with SetStartTime a timestamp can be given, if not then the internal timestamp is used (this is recommended) | |
# SetEndTime(optional timestamp) | |
# sets the end time for the overall processing. This should be called at the very end of the script before any final stat data is printed | |
# linecount($lines int) | |
# sets the maximum lines that will be processed, used for percentage calculation. If non int is given, will set to 1. This will be only set once, to | |
# reset used reset() method. | |
# Either this or filesize NEED to be set | |
# filesize($bytes int) | |
# filesize in bytes, if non valid data is given, then it is set to 1. | |
# filesize() and linecount() can both be set, but at least one of them has to be set. | |
# if filesize is set a byte data output is added, if only linecount is given, only the linecount output will be given (no bytes per second, etc) | |
# ShowPosition(optional current byte position int) | |
# this is the main processing and has to be called at the end of the loop where the data is processed. If no bytes are given the internal counter (linecount) | |
# is used. | |
# for bytes it is recommended to use IO::File and $FH->tell to pass on the bytes | |
# | |
# VARIABLES | |
# * internal set | |
# change: flagged 1 if output is given or would be given. can be used for any post processing after the ShowPosition is called | |
# precision_ten_step: flagged 1 if the precision was set to -1 | |
# start: overall start time | |
# end: overall end time | |
# count: count of processed lines | |
# [TODO: describe the others too, at the moment only below in %fields] | |
use strict; | |
use warnings; | |
use utf8; | |
BEGIN | |
{ | |
use POSIX; | |
use Carp; | |
use Time::HiRes qw(time); | |
use File::Basename; | |
use Number::Format qw(format_number); | |
use vars qw($AUTOLOAD); | |
push(@INC, File::Basename::dirname($0).'/'); | |
} | |
# important includes | |
use functions; | |
# variable declarationf or access | |
# * can be set | |
# = only for read | |
# unmarked are internal only, but can be read if they are needed in further processing in the script | |
my %fields = ( | |
linecount => 0, # * max lines in input | |
filesize => 0, # * max file size | |
precision => 1, # * comma after percent | |
wide_time => 0, # * if flagged 1, then the wide 15 char left bound format is used | |
verbose => 0, # * verbose status from outside | |
microtime => 0, # * microtime output for last run time (1 for enable, 0 for auto, -1 for disable) | |
change => 0, # = flag if output was given | |
start => undef, # = global start for the full script running time | |
start_run => undef, # = for the eta time, can be set after a query or long read in, to not create a wrong ETA time | |
start_time => undef, # loop start | |
end => undef, # = global end | |
end_time => undef, # loop end | |
count_size => undef, # = filesize current | |
count => 0, # = position current | |
current_count => 0, # last count (position) | |
lines_processed => 0, # lines processed in the last run | |
last_group => 0, # time in seconds for the last group run (until percent change) | |
lines_in_last_group => 0, # float value, lines processed per second to the last group run | |
lines_in_global => 0, # float values, lines processed per second to complete run | |
bytes_in_last_group => 0, # flaot value, bytes processes per second in the last group run | |
bytes_in_global => 0, # float value, bytes processed per second to complete run | |
size_in_last_group => 0, # bytes processed in last run (in bytes) | |
current_size => 0, # current file position (size) | |
last_percent => 0, # last percent position | |
precision_ten_step => 0, # if we have normal % or in steps of 10 | |
percent_print => 5, # the default size, this is precision + 4 | |
percent_precision => 1, # this is 1 if it is 1 or 0 for precision, or precision size | |
eta => undef, # estimated time to finish | |
full_time_needed => undef, # run time since start | |
lg_microtime => 0 # last group microtime, this is auto set during process. | |
); | |
{ | |
# class init | |
sub new | |
{ | |
my $proto = shift; | |
my $class = ref($proto) || $proto; | |
my %data = @_; | |
my $self = { | |
_permitted => \%fields, | |
%fields, | |
}; | |
# vars to init | |
bless ($self, $class); | |
$self->{verbose} = $data{'verbose'} if ($data{'verbose'} && $data{'verbose'} =~ /^\d{1}$/); | |
$self->precision($data{'precision'}) if (exists($data{'precision'}) && (($data{'precision'} || $data{'precision'} == 0) && $data{'precision'} =~ /^\-?\d{1,2}$/)); | |
$self->microtime($data{'microtime'}) if ($data{'microtime'} && $data{'microtime'} =~ /^(0|1)$/); | |
$self->wide_time($data{'wide_time'}) if ($data{'wide_time'} && $data{'wide_time'} =~ /^(0|1)$/); | |
return $self; | |
} | |
# auto load for vars | |
sub AUTOLOAD | |
{ | |
my $self = shift; | |
my $type = ref($self) || croak "$self is not an object"; | |
my $name = $AUTOLOAD; | |
$name =~ s/.*://; | |
unless (exists $self->{_permitted}->{$name}) | |
{ | |
croak "Can't access '$name' field in class $type"; | |
} | |
if (@_) | |
{ | |
return $self->{$name} = shift; | |
} | |
else | |
{ | |
return $self->{$name}; | |
} | |
} | |
# destructor | |
sub DESTROY | |
{ | |
# do nothing, there is nothing to close or finish | |
} | |
# SUB: reset | |
# PARAMS: none | |
# DESC: resets all the current counters only and current start times | |
sub reset | |
{ | |
my $self = shift; | |
# reset what always gets reset | |
$self->{count} = 0; | |
$self->{count_size} = undef; | |
$self->{current_count} = 0; | |
$self->{linecount} = 0; | |
$self->{lines_processed} = 0; | |
$self->{last_group} = 0; | |
$self->{lines_in_last_group} = 0; | |
$self->{lines_in_global} = 0; | |
$self->{bytes_in_last_group} = 0; | |
$self->{bytes_in_global} = 0; | |
$self->{size_in_last_group} = 0; | |
$self->{filesize} = 0; | |
$self->{current_size} = 0; | |
$self->{last_percent} = 0; | |
$self->{eta} = 0; | |
$self->{full_time_needed} = 0; | |
$self->{start_run} = undef; | |
$self->{start_time} = undef; | |
$self->{end_time} = undef; | |
} | |
# SUB: microtime | |
# PARAMS: 1/0 | |
# DESC: flag to set microtime on or off in the time output | |
# if not 1 or 0, set to 0 | |
sub microtime | |
{ | |
my $self = shift; | |
my $microtime; | |
if (@_) | |
{ | |
$microtime = shift; | |
if ($microtime == 1 || $microtime == 0) | |
{ | |
$self->{microtime} = $microtime; | |
} | |
else | |
{ | |
$self->{microtime} = 0; | |
} | |
} | |
return $self->{microtime}; | |
} | |
# SUB: wide_time | |
# PARAMS: 1/0 | |
# DESC: flag to set wide_time (15 char spacer). | |
# if not 1 or 0, set to 0 | |
sub wide_time | |
{ | |
my $self = shift; | |
my $wide; | |
if (@_) | |
{ | |
$wide = shift; | |
if ($wide == 1 || $wide == 0) | |
{ | |
$self->{wide_time} = $wide; | |
} | |
else | |
{ | |
$self->{wide_time} = 0; | |
} | |
} | |
return $self->{wide_time}; | |
} | |
# SUB: precision | |
# PARAMS: precision in int | |
# DESC: sets the output percent precision calculation and printf width | |
# if negative, to ten step, if bigger 10, set to one | |
sub precision | |
{ | |
my $self = shift; | |
my $comma; | |
if (@_) | |
{ | |
$comma = shift; | |
$comma = 0 if ($comma !~ /^\-?\d{1,}$/); | |
if ($comma < 0) | |
{ | |
# -2 is 5 step | |
# -1 is 10 step | |
if ($comma < -1) | |
{ | |
$self->{precision_ten_step} = 5; | |
} | |
else | |
{ | |
$self->{precision_ten_step} = 10; | |
} | |
$self->{precision} = 0; # no comma | |
$self->{percent_precision} = 0; # no print precision | |
$self->{percent_print} = 3; # max 3 length | |
} | |
else | |
{ | |
$self->{precision} = $comma < 0 || $comma > 10 ? 10 : $comma; | |
$self->{percent_precision} = $comma < 0 || $comma > 10 ? 10 : $comma; | |
$self->{percent_print} = ($comma == 0 ? 3 : 4) + $self->{percent_precision}; | |
} | |
} | |
return $self->{precision}; | |
} | |
# SUB: linecount | |
# PARAMS: max number of lines to be processed | |
# DESC: sets the max number for lines for the percent calculation, if negative or not number, set to 1 | |
# can only be set ONCE | |
sub linecount | |
{ | |
my $self = shift; | |
my $linecount; | |
if (!$self->{linecount}) | |
{ | |
if (@_) | |
{ | |
$linecount = shift; | |
$self->{linecount} = $linecount; | |
$self->{linecount} = 1 if ($linecount < 0 || $linecount !~ /\d+/) | |
} | |
} | |
return $self->{linecount}; | |
} | |
# SUB: filesize | |
# PARAMS: max filesize for the to processed data | |
# DESC: sets the max filesize for the to processed data, if negative or not number, set to 1 | |
# input data has to be in bytes without any suffix (no b, kb, etc) | |
# can only be set ONCE | |
sub filesize | |
{ | |
my $self = shift; | |
my $filesize; | |
if (!$self->{filesize}) | |
{ | |
if (@_) | |
{ | |
$filesize = shift; | |
$self->{filesize} = $filesize; | |
$self->{filesize} = 1 if ($filesize < 0 || $filesize !~ /\d+/) | |
} | |
} | |
return $self->{filesize}; | |
} | |
# SUB: SetStartTime | |
# PARAMS: time, or nothing | |
# DESC: sets all the start times | |
sub SetStartTime | |
{ | |
my $self = shift; | |
if (@_) | |
{ | |
$self->{start} = shift; | |
} | |
else | |
{ | |
$self->{start} = time(); | |
} | |
$self->{start_time} = $self->{start}; | |
$self->{start_run} = $self->{start}; | |
} | |
# SUB: SetETAStartTime | |
# PARAMS: time, or nothing | |
# DESC: sets the loop & run time, for correct ETA callculation | |
sub SetETAStartTime | |
{ | |
my $self = shift; | |
if (@_) | |
{ | |
$self->{start_time} = shift; | |
} | |
else | |
{ | |
$self->{start_time} = time(); | |
} | |
$self->{start_run} = $self->{start_time}; | |
} | |
# SUB: SetEndTime | |
# PARAMS: time, or nothing | |
# DESC: sets the end time for running time calculation | |
sub SetEndTime | |
{ | |
my $self = shift; | |
if (@_) | |
{ | |
$self->{end} = shift; | |
} | |
else | |
{ | |
$self->{end} = time(); | |
} | |
} | |
# SUB: ShowPosition | |
# PARAMS: optiona; file position (via file pointer) | |
# RETURN: string for percent position output | |
# DESC: calculates the current percent position based on the passed parameter, if no parameter uses intneral counter | |
sub ShowPosition | |
{ | |
my $self = shift; | |
# set local vars | |
my $percent; # current percent | |
my $full_time_needed; # complete process time | |
my $full_time_per_line; # time per line | |
my $eta; # estimated end time | |
my $string = ''; # percent string that gets output | |
my $show_filesize = 1; | |
# microtime flags | |
my $eta_microtime = 0; | |
my $ftn_microtime = 0; | |
my $lg_microtime = 0; | |
# percent precision calc | |
my $_p_spf = "%.".$self->{precision}."f"; | |
# output format for percent | |
my $_pr_p_spf = "%".$self->{percent_print}.".".$self->{percent_precision}."f"; | |
# set the linecount precision based on the final linecount, if not, leave it empty | |
my $_pr_lc = "%s"; | |
$_pr_lc = "%".length(format_number($self->{linecount}))."s" if ($self->{linecount}); | |
# time format, if flag is set, the wide format is used | |
my $_pr_tf = "%s"; | |
$_pr_tf = "%-15s" if ($self->{'wide_time'}); | |
# do the smae for file size | |
# my $_pr_fs = "%s"; | |
# $_pr_fs = "%".length(function::convert_number($self->{filesize}))."s" if ($self->{filesize}); | |
# increase position by one | |
$self->{count} ++; | |
# see if we get anything from IO tell | |
if (@_) | |
{ | |
$self->{file_pos} = shift; | |
} | |
else | |
{ | |
# we did not, so we set internal value | |
$self->{file_pos} = $self->{count}; | |
# we also check if the filesize was set now | |
if (!$self->{filesize}) | |
{ | |
$self->{filesize} = $self->{linecount}; | |
} | |
# set ignore filesize output (no data) | |
$show_filesize = 0; | |
} | |
# set the count size based on the file pos, is only used if we have filesize | |
$self->{count_size} = $self->{file_pos}; | |
# do normal or down to 10 (0, 10, ...) % | |
if ($self->{precision_ten_step}) | |
{ | |
# calc 0 comma precision, so just do a floor | |
my $_percent = sprintf("%d", ($self->{file_pos} / $self->{filesize}) * 100); | |
# mod that to 10 | |
my $mod = $_percent % $self->{precision_ten_step}; | |
# either write this one, or write the previous, old one | |
$percent = $mod == 0 ? $_percent : $self->last_percent; | |
# print "P: $percent, Last: ".$self->last_percent.", Mod: ".$mod.", Calc: ".$_percent."\n"; | |
} | |
else | |
{ | |
$percent = sprintf($_p_spf, ($self->{file_pos} / $self->{filesize}) * 100); | |
} | |
# print "POS: ".$self->{file_pos}.", PERCENT: $percent / ".$self->last_percent."\n"; | |
if ($percent != $self->last_percent) | |
{ | |
$self->{end_time} = time(); | |
# for from the beginning | |
$full_time_needed = $self->{end_time} - $self->{start_run}; # how long from the start; | |
$self->{last_group} = $self->{end_time} - $self->{start_time}; | |
$self->{lines_processed} = $self->{count} - $self->{current_count}; | |
# lines in last group | |
$self->{lines_in_last_group} = $self->{'last_group'} ? ($self->{lines_processed} / $self->{last_group}) : 0; | |
# lines in global | |
$self->{lines_in_global} = $full_time_needed ? ($self->{'count'} / $full_time_needed) : 0; | |
# if we have linecount | |
if (!$self->{linecount}) | |
{ | |
$full_time_per_line = (($full_time_needed) ? $full_time_needed : 1) / $self->{count_size}; # how long for all | |
$eta = $full_time_per_line * ($self->{filesize} - $self->{count_size}); # estimate for the rest | |
} | |
else | |
{ | |
$full_time_per_line = (($full_time_needed) ? $full_time_needed : 1) / $self->{count}; # how long for all | |
$eta = $full_time_per_line * ($self->{linecount} - $self->{count}); # estimate for the rest | |
} | |
# just in case ... | |
$eta = '0' if ($eta < 0); | |
# check if to show microtime | |
# ON: if microtime is flagged as one | |
$eta_microtime = $ftn_microtime = $lg_microtime = 1 if ($self->{microtime} == 1); | |
# AUTO: foir microtime | |
if ($self->{microtime} == 0) | |
{ | |
$eta_microtime = 1 if ($eta > 0 && $eta < 1); | |
$ftn_microtime = 1 if ($full_time_needed > 0 && $full_time_needed < 1); | |
# pre check last group: if pre comma part is same add microtime anyway | |
$lg_microtime = 1 if ($self->{last_group} > 0 && $self->{last_group} < 1); | |
} | |
# print out | |
if ($show_filesize) | |
{ | |
# last group size | |
$self->{size_in_last_group} = $self->{count_size} - $self->{current_size}; | |
# calc kb/s if there is any filesize data | |
# last group | |
$self->{bytes_in_last_group} = $self->{'last_group'} ? ($self->{size_in_last_group} / $self->{last_group}) : 0; | |
# global | |
$self->{bytes_in_global} = $full_time_needed ? ($self->{count_size} / $full_time_needed) : 0; | |
# only used if we run with file size for the next check | |
$self->{current_size} = $self->{count_size}; | |
$string = sprintf ("Processed ".$_pr_p_spf."%% [%s / %s] | ".$_pr_lc." / ".$_pr_lc." Lines | ETA: ".$_pr_tf." / TR: ".$_pr_tf." / LR: %s lines (%s) in %s, %s (%s) lines/s, %s (%s) b/s\n", $percent, function::convert_number($self->{count_size}), function::convert_number($self->{filesize}), format_number($self->{count}), format_number($self->{linecount}), function::convert_time($eta, $eta_microtime), function::convert_time($full_time_needed, $ftn_microtime), format_number($self->{lines_processed}), function::convert_number($self->{size_in_last_group}), function::convert_time($self->{last_group}, $lg_microtime), format_number($self->{lines_in_global}, 2, 1), format_number($self->{lines_in_last_group}, 2, 1), function::convert_number($self->{bytes_in_global}), function::convert_number($self->{bytes_in_last_group})) if ($self->{verbose} >= 1); | |
} | |
else | |
{ | |
$string = sprintf ("Processed ".$_pr_p_spf."%% | ".$_pr_lc." / ".$_pr_lc." Lines | ETA: ".$_pr_tf." / TR: ".$_pr_tf." / LR: %s lines in %s, %s (%s) lines/s\n", $percent, format_number($self->{count}), format_number($self->{linecount}), function::convert_time($eta, $eta_microtime), function::convert_time($full_time_needed, $ftn_microtime), format_number($self->{lines_processed}), function::convert_time($self->{last_group}, $lg_microtime), format_number($self->{lines_in_global}, 2, 1), format_number($self->{lines_in_last_group}, 2, 1)) if ($self->{verbose} >= 1); | |
} | |
# write back vars | |
$self->{last_percent} = $percent; | |
$self->{eta} = $eta; | |
$self->{full_time_needed} = $full_time_needed; | |
$self->{lg_microtime} = $lg_microtime; | |
# for the next run, check data | |
$self->{start_time} = time(); | |
$self->{current_count} = $self->{count}; | |
# trigger if this is a change | |
$self->{change} = 1; | |
} | |
else | |
{ | |
# trigger if this is a change | |
$self->{change} = 0; | |
} | |
return $string; | |
} | |
} # end block | |
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# AUTHOR: Clemens Schwaighofer | |
# DATE CREATED: 2009/7/24 | |
# DESCRIPTION: progress percent class (perl -> python) | |
from functions import FormatBytes, FormatNumber, ConvertTimestamp, CreateTime; | |
import time; | |
import re; | |
from math import floor; | |
class Progress(): | |
def __init__(self, verbose = 0, microtime = 0, precision = 1, wide_time = 0): | |
# set default var stuff | |
self.linecount = 0; # max lines in input | |
self.filesize = 0; # max file size | |
self.precision = 0; # * comma after percent | |
self.wide_time = 0; # * if flagged 1, then wthe wide 15 char left bound format is used | |
self.verbose = 0; # * verbose status from outside | |
self.microtime = 0; # * microtime output for last run time (1 for enable 0 for auto -1 for disable) | |
self.change = 0; # = flag if output was given | |
self.start = None; # = global start for the full script running time | |
self.start_run = None; # = for the eta time, can be set after a query or long read in, to not create a wrong ETA time | |
self.start_time = None; # loop start | |
self.end = None; # global end | |
self.end_time = None; # loop end | |
self.run_time = None; # run time in seconds, set when end time method is called | |
self.count_size = None; # = filesize current | |
self.count = 0; # position current | |
self.current_count = 0; # last count (position) | |
self.lines_processed = 0; # lines processed in the last run | |
self.last_group = 0; # time in th seconds for the last group run (until percent change) | |
self.lines_in_last_group = 0; # float value, lines processed per second to the last group run | |
self.lines_in_global = 0; # float values, lines processed per second to complete run | |
self.bytes_in_last_group = 0; # flaot value, bytes processes per second in the last group run | |
self.bytes_in_global = 0; # float value, bytes processed per second to complete run | |
self.size_in_last_group = 0; # bytes processed in last run (in bytes) | |
self.current_size = 0; # current file position 8size) | |
self.last_percent = 0; # last percent position | |
self.precision_ten_step = 0; # if we have normal % or in steps of 10 | |
self.percent_print = 5; # the default size this is precision + 4 | |
self.percent_precision = 1; # this is 1 if it is 1 or 0 for precision or precision size | |
self.eta = None; # estimated time to finish | |
self.full_time_needed = None; # run time since start | |
self.string = ''; # the actual output | |
# initialize the class | |
self.SetPrecision(precision); | |
self.SetVerbose(verbose); | |
self.SetMicrotime(microtime); | |
self.SetWideTime(wide_time); | |
self.SetStartTime(); | |
# METHOD: Reset | |
# PARAMS: self (auto) | |
# RETURN: none | |
# DESC : resets the current progress to 0, but keeps the overall start variables set | |
def Reset(self): | |
# reset what always gets reset | |
self.count = 0; | |
self.count_size = None; | |
self.current_count = 0; | |
self.linecount = 0; | |
self.lines_processed = 0; | |
self.last_group = 0; | |
self.lines_in_last_group = 0; | |
self.lines_in_global = 0; | |
self.bytes_in_last_group = 0; | |
self.bytes_in_global = 0; | |
self.size_in_last_group = 0; | |
self.filesize = 0; | |
self.current_size = 0; | |
self.last_percent = 0; | |
self.eta = 0; | |
self.full_time_needed = 0; | |
self.start_run = None; | |
self.start_time = None; | |
self.end_time = None; | |
# METHOD: SetWideTime | |
# PARAMS: self (auto), wide_time: int(1/0) | |
# RETURN: set wide_time | |
# DESC : sets the show wide time flag if value is 1, else sets to 0 | |
def SetWideTime(self, wide_time): | |
if wide_time == 1: | |
self.wide_time = 1; | |
else: | |
self.wide_time = 0; | |
return self.wide_time; | |
# METHOD: SetMicrotime | |
# PARAMS: self (auto), microtime: int(1/0) | |
# RETURN: set microtime | |
# DESC : sets the show microtime flag if value is 1, else sets to 0 | |
def SetMicrotime(self, microtime): | |
if microtime == 1: | |
self.microtime = 1; | |
else: | |
self.microtime = 0; | |
return self.microtime; | |
# METHOD: SetVerbose | |
# PARAMS: self (auto), verbose: int(>=0) | |
# RETURN: set verbose | |
# DESC : set the internal verbose flag to 1 if any value higher than 1 is given, else sets it to 0 | |
def SetVerbose(self, verbose): | |
if verbose > 0: | |
self.verbose = 1; | |
else: | |
self.verbose = 0; | |
return self.verbose; | |
# METHOD: SetPrecision | |
# PARAMS: self (auto), precision int(-1, 0-10) | |
# RETURN: set precision | |
# DESC : sets the output precision size. If -1 is given, then ten step output is set | |
# else sets the precision, for 0, no precision is set, maximum precision is 10 | |
def SetPrecision(self, precision): | |
# if not a valid number, we set it to 0 | |
if not re.match("^\-?\d{1,}$", str(precision)): | |
precision = 0; | |
if precision < 0: | |
if precision < -1: | |
self.precision_ten_step = 5; | |
else: | |
self.precision_ten_step = 10; | |
self.precision = 0; # no comma | |
self.percent_precision = 0; # no print precision | |
self.percent_print = 3; # max 3 length | |
else: | |
# comma values visible | |
self.precision = 10 if precision < 0 or precision > 10 else precision; | |
# for calcualtion of precision | |
self.percent_precision = 10 if precision < 0 or precision > 10 else precision; | |
# for the format output base is 4, plsut he percent precision length | |
self.percent_print = (3 if precision == 0 else 4) + self.percent_precision | |
# return the set precision | |
return self.precision; | |
# METHOD: SetLinecount | |
# PARAMS: self (auto), linecount int(>0) | |
# RETURN: set linecount | |
# DESC : set the maximum lines in this file, if value is smaller than 0 or 0, then it is set to 1 | |
def SetLinecount(self, linecount): | |
if linecount > 0: | |
self.linecount = linecount; | |
else: | |
self.linecount = 1; | |
return self.linecount; | |
# METHOD: SetFilesize | |
# PARAMS: self (auto), filesize int(>0) | |
# RETURN: set filesize | |
# DESC : set the maximum filesize for this file, if value is smaller than 0 or 0, then it is set to 1 | |
def SetFilesize(self, filesize): | |
if filesize > 0: | |
self.filesize = filesize; | |
else: | |
self.filesize = 1; | |
return self.filesize; | |
# METHOD: SetStartTime | |
# PARAMS: self (auto), time (optional, auto is time) | |
# RETURN: none | |
# DESC : initial set of the start times, auto set | |
def SetStartTime(self, time = time.time()): | |
# avoid possible double set of the original start time | |
if not self.start: | |
self.start = time; | |
self.start_time = time; | |
self.start_run = time; | |
# METHOD: SetETAStartTime | |
# PARAMS: self (auto), time (optional, auto is time) | |
# RETURN: none | |
# DESC : sets the loop % run time, for correct ETA calculation | |
# calls set start time, as the main start time is only set once | |
def SetETAStartTime(self, time = time.time()): | |
self.SetStartTime(time); | |
# self.start_time = time; | |
# self.start_run = time; | |
# METHOD: SetEndTime | |
# PARAMS: self (auto), time (optional, auto is time) | |
# RETURN: none | |
# DESC : set the end time | |
def SetEndTime(self, time = time.time()): | |
self.end = time | |
self.end_time = time | |
# the overall run time in micro seconds | |
self.run_time = self.end - self.start; | |
# METHOD: ShowPosition | |
# PARAMS: self (auto), filepos [for file size progress] (optional, auto is 0) | |
# RETURN: none | |
# DESC : processes the current position. either based on read the file size pos, or the line count | |
def ShowPosition(self, filepos = 0): | |
# { | |
show_filesize = 1 # if we print from file size or line count | |
# microtime flags | |
eta_microtime = 0 | |
ftn_microtime = 0 | |
lg_microtime = 0 | |
# percent precision calc | |
_p_spf = "%." + str(self.precision) + "f" | |
# output format for percent | |
_pr_p_spf = "%" + str(self.percent_print) + "." + str(self.percent_precision) + "f" | |
# set the linecount precision based on the final linecount, if not, leave it empty | |
_pr_lc = "%s"; | |
if self.linecount: | |
_pr_lc = "%" + str(len(str(FormatNumber(self.linecount)))) + "s"; | |
# time format, if flag is set, the wide format is used | |
_pr_tf = "%s"; | |
if self.wide_time: | |
_pr_tf = "%-15s"; | |
# count up | |
self.count += 1 | |
# if we have file pos from parameter | |
if filepos != 0: | |
self.file_pos = filepos | |
else: | |
# we did not, so we set internal value | |
self.file_pos = self.count | |
# we also check if the filesize was set now | |
if self.filesize == 0: | |
self.filesize = self.linecount | |
# set ignore filesize output (no data) | |
show_filesize = 0 | |
# set the count size based on the file pos, is only used if we have filesize | |
self.count_size = self.file_pos; | |
# do normal or down to 10 (0, 10, ...) % | |
if self.precision_ten_step: | |
_percent = int((self.file_pos / float(self.filesize)) * 100); | |
mod = _percent % self.precision_ten_step; | |
percent = _percent if mod == 0 else self.last_percent; | |
else: | |
# calc percent | |
percent = round(((self.file_pos / float(self.filesize)) * 100), self.precision) | |
# output | |
if percent != self.last_percent: | |
self.end_time = time.time() # current time (for loop time) | |
# for from the beginning | |
full_time_needed = self.end_time - self.start # how long from the start | |
self.last_group = self.end_time - self.start_time # how long for last loop | |
lines_processed = self.count - self.current_count # how many lines processed | |
# lines in last group | |
self.lines_in_last_group = (self.lines_processed / self.last_group) if self.last_group else 0; | |
# lines in global | |
self.lines_in_global = (self.count / full_time_needed) if full_time_needed else 0; | |
# if we have linecount or not | |
if self.linecount == 0: | |
full_time_per_line = (full_time_needed if full_time_needed else 1) / count_size # how long for all | |
eta = full_time_per_line * (self.filesize - count_size) # estimate for the rest | |
else: | |
full_time_per_line = (full_time_needed if full_time_needed else 1) / self.count # how long for all | |
eta = full_time_per_line * (self.linecount - self.count) # estimate for the rest | |
# just in case ... | |
if eta < 0: | |
eta = 0 | |
# check if to show microtime | |
# ON | |
if self.microtime == 1: | |
eta_microtime = ftn_microtime = lg_microtime = 1 | |
# AUTO | |
if self.microtime == 0: | |
if eta > 0 and eta < 1: | |
eta_microtime = 1 | |
if full_time_needed > 0 and full_time_needed < 1: | |
ftn_microtime = 1 | |
# pre check last group: if pre comma part is same add microtime anyway | |
if self.last_group > 0 and self.last_group < 1: | |
lg_microtime = 1 | |
if self.last_group == floor(self.last_group): | |
lg_microtime = 1 | |
self.last_group = floor(self.last_group) | |
# if with filesize or without | |
if show_filesize: | |
# last group size | |
self.size_in_last_group = self.count_size - self.current_size; | |
# calc kb/s if there is any filesize data | |
# last group | |
self.bytes_in_last_group = (self.size_in_last_group / self.last_group) if self.last_group else 0; | |
# global | |
self.bytes_in_global = (self.count_size / full_time_needed) if full_time_needed else 0; | |
# only used if we run with file size for the next check | |
self.current_size = self.count_size; | |
if self.verbose >= 1: | |
_string = "Processed " + _pr_p_spf + "%% [%s / %s] | " + _pr_lc + " / " + _pr_lc + " Lines | ETA: " + _pr_tf + " / TR: " + _pr_tf + " / LR: %s lines (%s) in %s, %s (%s) lines/s, %s (%s) b/s" | |
self.string = _string % (float(percent), FormatBytes(count_size), FormatBytes(self.filesize), FormatNumber(self.count), FormatNumber(self.linecount), ConvertTimestamp(eta, eta_microtime), ConvertTimestamp(full_time_needed, ftn_microtime), FormatNumber(lines_processed), FormatBytes(self.size_in_last_group), ConvertTimestamp(self.last_group, lg_microtime), FormatNumber(self.lines_in_global, 2, 1), FormatNumber(self.lines_in_last_group, 2, 1), FormatBytes(self.bytes_in_global), FormatBytes(self.bytes_in_last_group)); | |
else: | |
if self.verbose >= 1: | |
_string = "Processed " + _pr_p_spf + "%% | " + _pr_lc + " / " + _pr_lc + " Lines | ETA: " + _pr_tf + " / TR: " + _pr_tf + " / LR: %s lines in %s, %s (%s) lines/s"; | |
self.string = _string % (float(percent), FormatNumber(self.count), FormatNumber(self.linecount), ConvertTimestamp(eta, eta_microtime), ConvertTimestamp(full_time_needed, ftn_microtime), FormatNumber(lines_processed), ConvertTimestamp(self.last_group, lg_microtime), FormatNumber(self.lines_in_global, 2, 1), FormatNumber(self.lines_in_last_group, 2, 1)); | |
# print the string if verbose is turned on | |
if self.verbose >= 1: | |
print self.string; | |
# write back vars | |
self.last_percent = percent | |
self.eta = eta; | |
self.full_time_needed = full_time_needed; | |
self.lg_microtime = lg_microtime; | |
# for the next run, check data | |
self.start_time = time.time() | |
self.current_count = self.count | |
# trigger if this is a change | |
self.change = 1; | |
else: | |
# trigger if this is a change | |
self.change = 0; | |
# return string | |
return self.string; | |
# } END OF ShowPosition | |
# END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment