nlitsme · August 29, 2015 14:08
diff --git a/lsq.pl b/lsq.pl
 #!perl -w
 # vim: sw=4 ts=4 et :
 # (C) 2003-2007 Willem Jan Hengeveld <[email protected]>
 # Web: http://www.xs4all.nl/~itsme/
 #      http://wiki.xda-developers.com/
 #
 # $Id: $
 #
 # http://en.wikipedia.org/wiki/Linear_regression
 # http://mathworld.wolfram.com/LeastSquaresFitting.html
 # http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html
 # http://mathworld.wolfram.com/LeastSquaresFittingExponential.html
 # http://mathworld.wolfram.com/LeastSquaresFittingPerpendicularOffsets.html
 # http://mathworld.wolfram.com/NonlinearLeastSquaresFitting.html
 #
 #  tests:
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4*exp(0.2*$_)) for 1..100' | lsq | sort -n
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2*log($_)) for 1..100' | lsq | sort -n
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4*$_**0.2) for 1..100' | lsq | sort -n
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2*($_**2)) for 1..100' | lsq | sort -n
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2*($_**5)) for 1..100' | lsq | sort -n
 # perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2*($_**7)) for 1..100' | lsq | sort -n
 #
 # todo: add fft for periodic signals
 # done: add option to output data+functions with gnuplot

 # todo: read about non-linear least squares Marquardt-Levenberg algorithm

 # ........ how to get proper timestamps:
 #     .... also added date+time as first 2 colums
 # set xdata time
 # set timefmt "%Y-%m-%d %H:%M:%S"
 # set format x "%H:%M"
 # plot '/tmp/test.dat' using 1:4, '' using 1:(328677.832933346+$3*(-0.000841587261071837)) title '1st order poly' smooth unique

 use strict;
 use warnings;
 use Getopt::Long;
 use Math::MatrixReal;
 #use File::Temp;
 use IO::File;
 use POSIX;
 $|=1;
 # simple case: a*x+b=y
 #   given many pairs (x,y) find a and b that minimize the error.
 #
 #  -> a*x0+b=y0
 #  -> a*x1+b=y1
 #  -> a*x2+b=y2
 #
 #  -> equation
 #   ( x0 1 )  ( a )   ( y0 )
 #   ( x1 1 ) *( b ) = ( y1 ) 
 #   ( x2 1 )          ( y2 )
 #
 #      A     * x    =  y
 #
 #  minimize distance to line:
 #    d(line,[xy]) = |A*x-y|
 #
 #  error :   |A*x-y| = (A*x-y)' * (A*x-y) 
 #        = x'*A'*A*x - 2*x*A'*y + y'*y
 # differentiate by x
 #   ->  2*A'*A*x-2*A'*y = 0  -> A'*A*x=A'*y
 #  mini

 # 'm' measurements
 # 'p' variables
 #
 # y_0=a0*x_0^0+a1*x_0^1+a2*x_0^2+..an*x_0^n
 # y_1=a0*x_1^0+a1*x_1^1+a2*x_1^2+..an*x_1^n
 # ..
 # y_m=a0*x_m^0+am*x_m^1+a2*x_m^2+..an*x_m^n

 # ->
 #
 # (y_0 .. y_m) = Matrix_rows( [x_0^0, x_0^1, .. x_0^n], [x_1^0 .. x_1^n] ... ) * (a0..an)

 #    y=sum(a_i*x^i) find_lsq_poly
 #    y=a*exp(b*x)   find_lsq_exponential
 #    y=a+b*ln(x)    find_lsq_logarithmic
 #    y=a*x^b        find_lsq_powerlaw

 # done:   add support for timestamps
 #   .. use parsedate to identify timestamps.
 #   timestamps can be with, or without date.
 # done:   add support for calc yresult for xval
 # done:   add support for calc xval for targety
 #   .. if list of values starting at col0 follows list, assume that 'y' needs to be calculated
 #   .. if list of values starting with TAB follows list, assum that 'x' needs to be calculated.

 my ($xcol, $ycol)=(0,1);
 my $calclog=1;
 my $maxorder=10;
 my $minorder=0;
 my $xinside;
 my $gnuplot;
 GetOptions(
    "x=s" => \$xcol,
    "y=s" => \$ycol,
    "o=i" => sub { $maxorder= $_[1]+1; $minorder=$_[1]; $calclog=0; },
    "f"   => \$xinside,
    "g"   => \$gnuplot,
 ) or die usage();
 sub usage {
    return <<__EOF__
 Usage: lsq [-x COL] [-y COL] [-o N]
   -x COL  : specify 'x' column ( first col = 0 )
   -y COL  : specify 'y' column
   -o N    : specify order of polynomial
   -f      : try to move factor inside exponent (a*x)^n
   -g      : output gnuplot data
 input data can contain date's + time's + 0xhexdump + a+b*c expressions
 for lines starting with TAB, or ? the corresponding 'x' will be calculated
 for lines ending with ? or which have only one value the corresponding 'y'
 will be calculated
 __EOF__
 }
 my @values;
 my @xvalues;
 my @yvalues;
 my @valueformat;    # -3:date+time, -2:date, -1:time, >=0: decimals
 while (<>) {
    next if /^#/;
    my @line= split /[^0-9x.e:-]+/, $_;
    next unless @line;
    shift @line if $line[0] eq "";
    for (my $i=0 ; $i<@line ; $i++) {
 	    if ($i<$#line && isdate($line[$i]) && istime($line[$i+1])) {
 		    $line[$i]= maketimestamp($line[$i], $line[$i+1]);
 		    $valueformat[$i]= -3;
 		    splice @line, $i+1,1;
 	    }
 	    elsif (isdate($line[$i])) {
 		    $line[$i]= maketimestamp($line[$i], 0);
 		    $valueformat[$i]= -2;
 	    }
 	    elsif (istime($line[$i])) {
 		    $line[$i]= maketimestamp(0, $line[$i]);
 		    $valueformat[$i]= -1;
 	    }
        elsif (isexpr($line[$i])) {
            $line[$i]= eval($line[$i]);
        }
        elsif ($line[$i] =~ /\.(\d+)/ && ( !defined $valueformat[$i] || length($1)>$valueformat[$i])) {
            $valueformat[$i]= length($1);
        }
    }
    if (defined $line[$xcol] && defined $line[$ycol]) {
 	    if ($line[$xcol] ne "?" && $line[$ycol] ne "?") {
 		    push @values, [$line[$xcol], $line[$ycol] ];
 	    }
 	    elsif ($line[$xcol] ne "?") {
 		    push @xvalues, $line[$xcol];
 	    }
 	    elsif ($line[$ycol] ne "?") {
 		    push @yvalues, $line[$ycol];
 	    }
    }
    elsif (@line) {
 	    if (/^\t/) {
 		    push @yvalues, $line[0];
 	    }
 	    else {
 		    push @xvalues, $line[0];
 	    }
    }
 }
 sub isexpr {
    my $number= "(?:0x[0-9a-f]+|0[0-7]+|[1-9][0-9]*)";
    return $_[0] =~ /^$number(?:[+*\/&|^-]$number)*$/;
 }
 sub isdate {
 	return ($_[0] =~ /\d-\d+-\d/);
 }
 sub istime {
 	return ($_[0] =~ /\d:\d/);
 }
 sub maketimestamp {
 	my ($date, $time) = @_;
 	my @ymd= $date ? split(/-/, $date) : (0,0,0);
 	if ($ymd[0]<100) {
 		# y2k fix!
 		if ($ymd[0]<50) {
 			$ymd[0]+=2000;
 		}
 		else {
 			$ymd[0]+=1900;
 		}
 	}
 	my @hms= $time ? split(/:/, $time) : (0,0,0);
 	push @hms,0 if (@hms==2);
 	$ymd[0]-=1900;
 	$ymd[1]-=1;
 	return POSIX::mktime(reverse @ymd,@hms)-($gnuplot ? 946684800 : 0);
 }
 my @errs;    # error for expr
 my @exprs;   # printable expressions
 my @funcs;   # perl functions
 my @gplot;   # gnuplot expressions
 my @title;   # titles for gnuplot
 # first try polynomials
 for (my $order=$minorder ; $order < $maxorder ; $order++) {
    my ($poly, $error)= find_lsq_poly($order, \@values);
    next if (!defined $poly);
    if ($gnuplot) {
    }
    if ($xinside) {
        push @errs, $error;
        push @exprs, join("", map { $_==0 ? sprintf("%5g", $poly->[$_]) :
                $_==1 ? sprintf("%+5g*x", $poly->[$_]) : 
                 sprintf("%s(%5g*x)^%d", $poly->[$_]<0?"-":"+",
                     abs($poly->[$_])**(1/$_), $_) } (0..$#$poly));
    }
    else {
        push @errs, $error;
        push @exprs, join("", map { $_==0 ? sprintf("%5g", $poly->[$_]) :
                $_==1 ? sprintf("%+5g*x", $poly->[$_]) : 
                 sprintf("%+5g*x^%d", $poly->[$_], $_) } (0..$#$poly));
    }

 	push @funcs, sub { my $res=0; for (my $i=$#$poly ; $i>=0 ; $i--) { $res *= $_[0]; $res += $poly->[$i]; } return $res; };
    #p0+x*(p1+x*(p2+x*p3))
    push @gplot, join("+\$1*(", @$poly).(')'x(@$poly-1));
    push @title, countword($order)." order poly";
 }
 sub countword {
    return "1st" if ($_[0]==1);
    return "2nd" if ($_[0]==2);
    return "3rd" if ($_[0]==3);
    return "$_[0]th";
 }
 if ($calclog) {
    {
        my ($ab, $err)= find_lsq_exponential(\@values);
        if ($ab) {
            push @errs, $err;
            push @exprs, sprintf("%5g * exp( %5g * x )", $ab->[0], $ab->[1]);
            push @funcs, sub { return $ab->[0]*exp($_[0]*$ab->[1]); };
            push @gplot, sprintf("%.10g*exp(\$1*%.10g)", $ab->[0], $ab->[1]);
            push @title, "a+exp(b*x)";
        }
    }
    {
        my ($ab, $err)= find_lsq_logarithmic(\@values);
        if ($ab) {
            #  a + b * l(x)  == a + l(x^b) == l(x^b*e^a) 
            push @errs, $err;
            push @exprs, sprintf("%5g + %5g * log( x )", $ab->[0], $ab->[1]);
            push @funcs, sub { if ($_[0]<=0) { return undef; } return $ab->[0]+log($_[0])*$ab->[1]; };
            push @gplot, sprintf("%.10g + %.10g * log( \$1 )", $ab->[0], $ab->[1]);
            push @title, "a+b*log(x)";
        }
    }

    {
        my ($ab, $err)= find_lsq_powerlaw(\@values);
        if ($ab) {
            if ($xinside) {
                push @errs, $err;
                push @exprs, sprintf("%s( %5g * x ) ^ %5g",
                    $ab->[0]<0?"-":"+",
                    abs($ab->[0])**(1/$ab->[1]), $ab->[1]);
            }
            else {
                push @errs, $err;
                push @exprs, sprintf("%5g * x ^ %5g", $ab->[0], $ab->[1]);
            }
            push @funcs, sub { return $ab->[0]*$_[0]**$ab->[1]; };
            push @gplot, sprintf("%.10g * exp(log(\$1)*%.10g)", $ab->[0], $ab->[1]);
            push @title, "a*x^b";
        }
    }
 }
 sub makedatafile {
    my ($values)= @_;
    #my $fh= File::Temp->new();
    my $fh= IO::File->new("/tmp/test.dat", "w");

    for (@$values) {
        $fh->printf("%.10g %.10g\n", @$_);
    }

    #return $fh->filename;
    return "/tmp/test.dat";
 }
 if ($gnuplot) {
    my $tmpdat= makedatafile(\@values);
    printf("plot '%s' using 1:2, %s\n", $tmpdat, join(", ", map { sprintf("'' using 1:(%s) title '%s' smooth unique", $gplot[$_], $title[$_]) } 0..$#gplot));
 }
 else {
    print map { sprintf("%10f %s\n", $errs[$_], $exprs[$_]) } 0..$#exprs;
 }
 for my $xval (@xvalues) {
    printf(" %s", formatvalue($xval, $valueformat[$xcol]));
    for my $f (@funcs) {
        printf(" %s", formatvalue($f->($xval), $valueformat[$ycol]));
    }
    print "\n";
 }
 for my $yval (@yvalues) {
    printf(" %s", formatvalue($yval, $valueformat[$ycol]));;
    for my $f (@funcs) {
        my $xval= findinverse($f, $yval);
        printf(" %s", formatvalue($xval, $valueformat[$xcol]));;
    }
    print "\n";
 }
 sub formatvalue {
    my ($val, $ts)= @_;
    $ts ||= 0;
    if (!defined $val) {
        return "undef";
    }
    if ($ts>=0) {
        return sprintf("%.*f", $ts, $val);
    }
    else {
        my @tv= localtime $val;
        $tv[4]+=1;
        $tv[5]+=1900;
        if ($ts==-1) {
            return sprintf("%02d:%02d:%02d", reverse @tv[0..2]);
        }
        elsif ($ts==-2) {
            return sprintf("%04d-%02d-%02d", reverse @tv[3..5]);
        }
        elsif ($ts==-3) {
            return sprintf("%04d-%02d-%02d %02d:%02d:%02d", reverse @tv[0..5]);
        }
    }
 }
 sub findslope {
    my ($f, $yval, $x)= @_;
    my $x0= $x;
    my $y0= $f->($x0)-$yval;
    my $x0d= $x0+0.0001;
    my $y0d= eval { $f->($x0d)-$yval; };
    if (!defined $y0d || $@) {
        $x0d= $x0-0.0001;
        $y0d= eval { $f->($x0d)-$yval; };
        if (!defined $y0d || $@) {
            return undef;
        }
    }
    if ($x0==$x0d) {
        return 999999999999;
    }
    return ($y0d-$y0)/($x0d-$x0);
 }
 sub findinverse {
    my ($f, $yval)= @_;
    # todo: calc epsilon based on min/max x-value.
    my $x0= $values[0][0];

    my $iter=0;
    while (1) {
        my $s= findslope($f, $yval, $x0);
        if (!defined $s || $s==0) {
            return undef;
        }
        my $y0= $f->($x0)-$yval;
        # y0+(x-x0)*s = 0 -> y0+x*s-x0*s -> x=x0-y0/s
        my $x1= $x0-$y0/$s;
        last if abs($x1-$x0)<0.0001;
        last if ($iter++>100);
        $x0= $x1;
    }

    return $x0;
 }
 sub find_lsq_poly {
    my ($n, $values)= @_;
 # (y_0 .. y_m) = Matrix_rows( [x_0^0, x_0^1, .. x_0^n], [x_1^0 .. x_1^n] ... ) * (a0..an)

    if ( $#$values-$n-1 <= 0 ) {
 	    return undef;
    }
    my @a;
    my @b;
    for (my $row=0 ; $row<=$#$values ; $row++) {
        push @b, $values->[$row][1];
        for (my $col=0 ; $col<=$n ; $col++) {
            $a[$row][$col]= $values->[$row][0]**$col;
        }
    }
    return solve_lsq(\@a, \@b);
 }
 #    y=a*exp(b*x) -> solve ln(y) = ln(a) + b*x
 #    ( 1  x0 )   ( ln(a) )    ( ln(y0) )
 #    ( 1  x1 ) * (   b   )  = ( ln(y1) )
 #    ( 1  x2 )                ( ln(y2) )
 #
 #    todo: how to calculate  err = y-a*exp(b*x)
 #       given   |ln(y)-a-b*x|
 sub find_lsq_exponential {
    my ($values)= @_;
    my @a;
    my @b;
    for (my $row=0 ; $row<=$#$values ; $row++) {
        return undef unless $values->[$row][1] > 0;
        push @b, log($values->[$row][1]);
        $a[$row][0]= 1;
        $a[$row][1]= $values->[$row][0];
    }
    my ($ab, $err)= solve_lsq(\@a, \@b);
    return undef if (!defined $ab);
 #   my $err2=0;
 #   $err2 += ($_->[1]-exp($ab->[0])*exp($ab->[1]*$_->[0]))**2 for @$values;
 #   printf("err1 : %f   err2 : %f\n", $err, sqrt($err2/($#$values-3)));
    return [ exp($ab->[0]), $ab->[1] ], $err;
 }
 #    y=a+b*ln(x)
 sub find_lsq_logarithmic {
    my ($values)= @_;
    my @a;
    my @b;
    for (my $row=0 ; $row<=$#$values ; $row++) {
        return undef unless $values->[$row][0] > 0;
        push @b, $values->[$row][1];
        $a[$row][0]= 1;
        $a[$row][1]= log($values->[$row][0]);
    }
    return solve_lsq(\@a, \@b);
 }
 #    y=a*x^b  ->  ln(y) = ln(a) + b*ln(x)
 sub find_lsq_powerlaw {
    my ($values)= @_;
    my @a;
    my @b;
    for (my $row=0 ; $row<=$#$values ; $row++) {
        return undef unless $values->[$row][1] > 0;
        push @b, log($values->[$row][1]);
        $a[$row][0]= 1;
        $a[$row][1]= log($values->[$row][0]);
    }
    my ($ab, $err)= solve_lsq(\@a, \@b);
    return undef if (!defined $ab);
    return [ exp($ab->[0]), $ab->[1] ], $err;
 }

 sub solve_lsq {
    my ($m_a, $v_b)= @_;
    return undef if (@{$m_a}-@{$m_a->[0]}-1<=0);
    my $A= Math::MatrixReal->new_from_rows($m_a);
    my $b= Math::MatrixReal->new_from_columns([$v_b]);
    my $Ab= (~$A) * $b;
    my $AA = (~$A) * $A;

    my $LR= $AA->decompose_LR();
    my ($dim,$x,$B) = $LR->solve_LR($Ab);

    if (!defined $x) {
 	    printf("NO SOLUTION!\n");
 	    return undef;
    }
    my $Axb= $A*$x-$b;
    my $error= (~$Axb)*$Axb;

    #printf("a : %d x %d -  b : %d  err:%f\n", scalar @$m_a, scalar @{$m_a->[0]}, scalar @$v_b, sqrt($error->element(1,1)/(@{$m_a}-@{$m_a->[0]}-1)));
    return [ map { $x->element($_,1) } (1..@{$m_a->[0]}) ], sqrt($error->element(1,1)/(@{$m_a}-@{$m_a->[0]}-1));
 }
	#!perl -w
	# vim: sw=4 ts=4 et :
	# (C) 2003-2007 Willem Jan Hengeveld <[email protected]>
	# Web: http://www.xs4all.nl/~itsme/
	# http://wiki.xda-developers.com/
	#
	# $Id: $
	#
	# http://en.wikipedia.org/wiki/Linear_regression
	# http://mathworld.wolfram.com/LeastSquaresFitting.html
	# http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html
	# http://mathworld.wolfram.com/LeastSquaresFittingExponential.html
	# http://mathworld.wolfram.com/LeastSquaresFittingPerpendicularOffsets.html
	# http://mathworld.wolfram.com/NonlinearLeastSquaresFitting.html
	#
	# tests:
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4exp(0.2$_)) for 1..100' \| lsq \| sort -n
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2*log($_)) for 1..100' \| lsq \| sort -n
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4$_*0.2) for 1..100' \| lsq \| sort -n
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2($_*2)) for 1..100' \| lsq \| sort -n
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2($_*5)) for 1..100' \| lsq \| sort -n
	# perl -e 'printf("%5.2f %5.2f\n", $_, 0.4+0.2($_*7)) for 1..100' \| lsq \| sort -n
	#
	# todo: add fft for periodic signals
	# done: add option to output data+functions with gnuplot

	# todo: read about non-linear least squares Marquardt-Levenberg algorithm

	# ........ how to get proper timestamps:
	# .... also added date+time as first 2 colums
	# set xdata time
	# set timefmt "%Y-%m-%d %H:%M:%S"
	# set format x "%H:%M"
	# plot '/tmp/test.dat' using 1:4, '' using 1:(328677.832933346+$3*(-0.000841587261071837)) title '1st order poly' smooth unique

	use strict;
	use warnings;
	use Getopt::Long;
	use Math::MatrixReal;
	#use File::Temp;
	use IO::File;
	use POSIX;
	$\|=1;
	# simple case: a*x+b=y
	# given many pairs (x,y) find a and b that minimize the error.
	#
	# -> a*x0+b=y0
	# -> a*x1+b=y1
	# -> a*x2+b=y2
	#
	# -> equation
	# ( x0 1 ) ( a ) ( y0 )
	# ( x1 1 ) *( b ) = ( y1 )
	# ( x2 1 ) ( y2 )
	#
	# A * x = y
	#
	# minimize distance to line:
	# d(line,[xy]) = \|A*x-y\|
	#
	# error : \|Ax-y\| = (Ax-y)' * (A*x-y)
	# = x'A'Ax - 2xA'y + y'*y
	# differentiate by x
	# -> 2A'Ax-2A'y = 0 -> A'Ax=A'y
	# mini

	# 'm' measurements
	# 'p' variables
	#
	# y_0=a0x_0^0+a1x_0^1+a2x_0^2+..anx_0^n
	# y_1=a0x_1^0+a1x_1^1+a2x_1^2+..anx_1^n
	# ..
	# y_m=a0x_m^0+amx_m^1+a2x_m^2+..anx_m^n

	# ->
	#
	# (y_0 .. y_m) = Matrix_rows( [x_0^0, x_0^1, .. x_0^n], [x_1^0 .. x_1^n] ... ) * (a0..an)

	# y=sum(a_i*x^i) find_lsq_poly
	# y=aexp(bx) find_lsq_exponential
	# y=a+b*ln(x) find_lsq_logarithmic
	# y=a*x^b find_lsq_powerlaw

	# done: add support for timestamps
	# .. use parsedate to identify timestamps.
	# timestamps can be with, or without date.
	# done: add support for calc yresult for xval
	# done: add support for calc xval for targety
	# .. if list of values starting at col0 follows list, assume that 'y' needs to be calculated
	# .. if list of values starting with TAB follows list, assum that 'x' needs to be calculated.

	my ($xcol, $ycol)=(0,1);
	my $calclog=1;
	my $maxorder=10;
	my $minorder=0;
	my $xinside;
	my $gnuplot;
	GetOptions(
	"x=s" => \$xcol,
	"y=s" => \$ycol,
	"o=i" => sub { $maxorder= $_[1]+1; $minorder=$_[1]; $calclog=0; },
	"f" => \$xinside,
	"g" => \$gnuplot,
	) or die usage();
	sub usage {
	return <<__EOF__
	Usage: lsq [-x COL] [-y COL] [-o N]
	-x COL : specify 'x' column ( first col = 0 )
	-y COL : specify 'y' column
	-o N : specify order of polynomial
	-f : try to move factor inside exponent (a*x)^n
	-g : output gnuplot data
	input data can contain date's + time's + 0xhexdump + a+b*c expressions
	for lines starting with TAB, or ? the corresponding 'x' will be calculated
	for lines ending with ? or which have only one value the corresponding 'y'
	will be calculated
	__EOF__
	}
	my @values;
	my @xvalues;
	my @yvalues;
	my @valueformat; # -3:date+time, -2:date, -1:time, >=0: decimals
	while (<>) {
	next if /^#/;
	my @line= split /[^0-9x.e:-]+/, $_;
	next unless @line;
	shift @line if $line[0] eq "";
	for (my $i=0 ; $i<@line ; $i++) {
	if ($i<$#line && isdate($line[$i]) && istime($line[$i+1])) {
	$line[$i]= maketimestamp($line[$i], $line[$i+1]);
	$valueformat[$i]= -3;
	splice @line, $i+1,1;
	}
	elsif (isdate($line[$i])) {
	$line[$i]= maketimestamp($line[$i], 0);
	$valueformat[$i]= -2;
	}
	elsif (istime($line[$i])) {
	$line[$i]= maketimestamp(0, $line[$i]);
	$valueformat[$i]= -1;
	}
	elsif (isexpr($line[$i])) {
	$line[$i]= eval($line[$i]);
	}
	elsif ($line[$i] =~ /\.(\d+)/ && ( !defined $valueformat[$i] \|\| length($1)>$valueformat[$i])) {
	$valueformat[$i]= length($1);
	}
	}
	if (defined $line[$xcol] && defined $line[$ycol]) {
	if ($line[$xcol] ne "?" && $line[$ycol] ne "?") {
	push @values, [$line[$xcol], $line[$ycol] ];
	}
	elsif ($line[$xcol] ne "?") {
	push @xvalues, $line[$xcol];
	}
	elsif ($line[$ycol] ne "?") {
	push @yvalues, $line[$ycol];
	}
	}
	elsif (@line) {
	if (/^\t/) {
	push @yvalues, $line[0];
	}
	else {
	push @xvalues, $line[0];
	}
	}
	}
	sub isexpr {
	my $number= "(?:0x[0-9a-f]+\|0[0-7]+\|[1-9][0-9]*)";
	return $_[0] =~ /^$number(?:[+\/&\|^-]$number)$/;
	}
	sub isdate {
	return ($_[0] =~ /\d-\d+-\d/);
	}
	sub istime {
	return ($_[0] =~ /\d:\d/);
	}
	sub maketimestamp {
	my ($date, $time) = @_;
	my @ymd= $date ? split(/-/, $date) : (0,0,0);
	if ($ymd[0]<100) {
	# y2k fix!
	if ($ymd[0]<50) {
	$ymd[0]+=2000;
	}
	else {
	$ymd[0]+=1900;
	}
	}
	my @hms= $time ? split(/:/, $time) : (0,0,0);
	push @hms,0 if (@hms==2);
	$ymd[0]-=1900;
	$ymd[1]-=1;
	return POSIX::mktime(reverse @ymd,@hms)-($gnuplot ? 946684800 : 0);
	}
	my @errs; # error for expr
	my @exprs; # printable expressions
	my @funcs; # perl functions
	my @gplot; # gnuplot expressions
	my @title; # titles for gnuplot
	# first try polynomials
	for (my $order=$minorder ; $order < $maxorder ; $order++) {
	my ($poly, $error)= find_lsq_poly($order, \@values);
	next if (!defined $poly);
	if ($gnuplot) {
	}
	if ($xinside) {
	push @errs, $error;
	push @exprs, join("", map { $_==0 ? sprintf("%5g", $poly->[$_]) :
	$_==1 ? sprintf("%+5g*x", $poly->[$_]) :
	sprintf("%s(%5g*x)^%d", $poly->[$_]<0?"-":"+",
	abs($poly->[$_])**(1/$_), $_) } (0..$#$poly));
	}
	else {
	push @errs, $error;
	push @exprs, join("", map { $_==0 ? sprintf("%5g", $poly->[$_]) :
	$_==1 ? sprintf("%+5g*x", $poly->[$_]) :
	sprintf("%+5g*x^%d", $poly->[$_], $_) } (0..$#$poly));
	}

	push @funcs, sub { my $res=0; for (my $i=$#$poly ; $i>=0 ; $i--) { $res *= $_[0]; $res += $poly->[$i]; } return $res; };
	#p0+x(p1+x(p2+x*p3))
	push @gplot, join("+\$1*(", @$poly).(')'x(@$poly-1));
	push @title, countword($order)." order poly";
	}
	sub countword {
	return "1st" if ($_[0]==1);
	return "2nd" if ($_[0]==2);
	return "3rd" if ($_[0]==3);
	return "$_[0]th";
	}
	if ($calclog) {
	{
	my ($ab, $err)= find_lsq_exponential(\@values);
	if ($ab) {
	push @errs, $err;
	push @exprs, sprintf("%5g * exp( %5g * x )", $ab->[0], $ab->[1]);
	push @funcs, sub { return $ab->[0]exp($_[0]$ab->[1]); };
	push @gplot, sprintf("%.10gexp(\$1%.10g)", $ab->[0], $ab->[1]);
	push @title, "a+exp(b*x)";
	}
	}
	{
	my ($ab, $err)= find_lsq_logarithmic(\@values);
	if ($ab) {
	# a + b * l(x) == a + l(x^b) == l(x^b*e^a)
	push @errs, $err;
	push @exprs, sprintf("%5g + %5g * log( x )", $ab->[0], $ab->[1]);
	push @funcs, sub { if ($_[0]<=0) { return undef; } return $ab->[0]+log($_[0])*$ab->[1]; };
	push @gplot, sprintf("%.10g + %.10g * log( \$1 )", $ab->[0], $ab->[1]);
	push @title, "a+b*log(x)";
	}
	}

	{
	my ($ab, $err)= find_lsq_powerlaw(\@values);
	if ($ab) {
	if ($xinside) {
	push @errs, $err;
	push @exprs, sprintf("%s( %5g * x ) ^ %5g",
	$ab->[0]<0?"-":"+",
	abs($ab->[0])**(1/$ab->[1]), $ab->[1]);
	}
	else {
	push @errs, $err;
	push @exprs, sprintf("%5g * x ^ %5g", $ab->[0], $ab->[1]);
	}
	push @funcs, sub { return $ab->[0]$_[0]*$ab->[1]; };
	push @gplot, sprintf("%.10g * exp(log(\$1)*%.10g)", $ab->[0], $ab->[1]);
	push @title, "a*x^b";
	}
	}
	}
	sub makedatafile {
	my ($values)= @_;
	#my $fh= File::Temp->new();
	my $fh= IO::File->new("/tmp/test.dat", "w");

	for (@$values) {
	$fh->printf("%.10g %.10g\n", @$_);
	}

	#return $fh->filename;
	return "/tmp/test.dat";
	}
	if ($gnuplot) {
	my $tmpdat= makedatafile(\@values);
	printf("plot '%s' using 1:2, %s\n", $tmpdat, join(", ", map { sprintf("'' using 1:(%s) title '%s' smooth unique", $gplot[$_], $title[$_]) } 0..$#gplot));
	}
	else {
	print map { sprintf("%10f %s\n", $errs[$_], $exprs[$_]) } 0..$#exprs;
	}
	for my $xval (@xvalues) {
	printf(" %s", formatvalue($xval, $valueformat[$xcol]));
	for my $f (@funcs) {
	printf(" %s", formatvalue($f->($xval), $valueformat[$ycol]));
	}
	print "\n";
	}
	for my $yval (@yvalues) {
	printf(" %s", formatvalue($yval, $valueformat[$ycol]));;
	for my $f (@funcs) {
	my $xval= findinverse($f, $yval);
	printf(" %s", formatvalue($xval, $valueformat[$xcol]));;
	}
	print "\n";
	}
	sub formatvalue {
	my ($val, $ts)= @_;
	$ts \|\|= 0;
	if (!defined $val) {
	return "undef";
	}
	if ($ts>=0) {
	return sprintf("%.*f", $ts, $val);
	}
	else {
	my @tv= localtime $val;
	$tv[4]+=1;
	$tv[5]+=1900;
	if ($ts==-1) {
	return sprintf("%02d:%02d:%02d", reverse @tv[0..2]);
	}
	elsif ($ts==-2) {
	return sprintf("%04d-%02d-%02d", reverse @tv[3..5]);
	}
	elsif ($ts==-3) {
	return sprintf("%04d-%02d-%02d %02d:%02d:%02d", reverse @tv[0..5]);
	}
	}
	}
	sub findslope {
	my ($f, $yval, $x)= @_;
	my $x0= $x;
	my $y0= $f->($x0)-$yval;
	my $x0d= $x0+0.0001;
	my $y0d= eval { $f->($x0d)-$yval; };
	if (!defined $y0d \|\| $@) {
	$x0d= $x0-0.0001;
	$y0d= eval { $f->($x0d)-$yval; };
	if (!defined $y0d \|\| $@) {
	return undef;
	}
	}
	if ($x0==$x0d) {
	return 999999999999;
	}
	return ($y0d-$y0)/($x0d-$x0);
	}
	sub findinverse {
	my ($f, $yval)= @_;
	# todo: calc epsilon based on min/max x-value.
	my $x0= $values[0][0];

	my $iter=0;
	while (1) {
	my $s= findslope($f, $yval, $x0);
	if (!defined $s \|\| $s==0) {
	return undef;
	}
	my $y0= $f->($x0)-$yval;
	# y0+(x-x0)s = 0 -> y0+xs-x0*s -> x=x0-y0/s
	my $x1= $x0-$y0/$s;
	last if abs($x1-$x0)<0.0001;
	last if ($iter++>100);
	$x0= $x1;
	}

	return $x0;
	}
	sub find_lsq_poly {
	my ($n, $values)= @_;
	# (y_0 .. y_m) = Matrix_rows( [x_0^0, x_0^1, .. x_0^n], [x_1^0 .. x_1^n] ... ) * (a0..an)

	if ( $#$values-$n-1 <= 0 ) {
	return undef;
	}
	my @a;
	my @b;
	for (my $row=0 ; $row<=$#$values ; $row++) {
	push @b, $values->[$row][1];
	for (my $col=0 ; $col<=$n ; $col++) {
	$a[$row][$col]= $values->[$row][0]**$col;
	}
	}
	return solve_lsq(\@a, \@b);
	}
	# y=aexp(bx) -> solve ln(y) = ln(a) + b*x
	# ( 1 x0 ) ( ln(a) ) ( ln(y0) )
	# ( 1 x1 ) * ( b ) = ( ln(y1) )
	# ( 1 x2 ) ( ln(y2) )
	#
	# todo: how to calculate err = y-aexp(bx)
	# given \|ln(y)-a-b*x\|
	sub find_lsq_exponential {
	my ($values)= @_;
	my @a;
	my @b;
	for (my $row=0 ; $row<=$#$values ; $row++) {
	return undef unless $values->[$row][1] > 0;
	push @b, log($values->[$row][1]);
	$a[$row][0]= 1;
	$a[$row][1]= $values->[$row][0];
	}
	my ($ab, $err)= solve_lsq(\@a, \@b);
	return undef if (!defined $ab);
	# my $err2=0;
	# $err2 += ($_->[1]-exp($ab->[0])exp($ab->[1]$_->[0]))**2 for @$values;
	# printf("err1 : %f err2 : %f\n", $err, sqrt($err2/($#$values-3)));
	return [ exp($ab->[0]), $ab->[1] ], $err;
	}
	# y=a+b*ln(x)
	sub find_lsq_logarithmic {
	my ($values)= @_;
	my @a;
	my @b;
	for (my $row=0 ; $row<=$#$values ; $row++) {
	return undef unless $values->[$row][0] > 0;
	push @b, $values->[$row][1];
	$a[$row][0]= 1;
	$a[$row][1]= log($values->[$row][0]);
	}
	return solve_lsq(\@a, \@b);
	}
	# y=ax^b -> ln(y) = ln(a) + bln(x)
	sub find_lsq_powerlaw {
	my ($values)= @_;
	my @a;
	my @b;
	for (my $row=0 ; $row<=$#$values ; $row++) {
	return undef unless $values->[$row][1] > 0;
	push @b, log($values->[$row][1]);
	$a[$row][0]= 1;
	$a[$row][1]= log($values->[$row][0]);
	}
	my ($ab, $err)= solve_lsq(\@a, \@b);
	return undef if (!defined $ab);
	return [ exp($ab->[0]), $ab->[1] ], $err;
	}

	sub solve_lsq {
	my ($m_a, $v_b)= @_;
	return undef if (@{$m_a}-@{$m_a->[0]}-1<=0);
	my $A= Math::MatrixReal->new_from_rows($m_a);
	my $b= Math::MatrixReal->new_from_columns([$v_b]);
	my $Ab= (~$A) * $b;
	my $AA = (~$A) * $A;

	my $LR= $AA->decompose_LR();
	my ($dim,$x,$B) = $LR->solve_LR($Ab);

	if (!defined $x) {
	printf("NO SOLUTION!\n");
	return undef;
	}
	my $Axb= $A*$x-$b;
	my $error= (~$Axb)*$Axb;

	#printf("a : %d x %d - b : %d err:%f\n", scalar @$m_a, scalar @{$m_a->[0]}, scalar @$v_b, sqrt($error->element(1,1)/(@{$m_a}-@{$m_a->[0]}-1)));
	return [ map { $x->element($_,1) } (1..@{$m_a->[0]}) ], sqrt($error->element(1,1)/(@{$m_a}-@{$m_a->[0]}-1));
	}