denis-bz · June 8, 2016 15:10
diff --git a/Fitline.md b/Fitline.md
diff --git a/fitline.py b/fitline.py
 #!/usr/bin/env python2
 """ Fitline: fit a line to data, print plot trim the biggest residuals

 Example:
 --------

    from fitline import Fitline
        # a class with functions .fit( x y ) ... and fields .a .b .residuals ...

    F = Fitline( level=.95, nbig=5, verbose=1, axis=None, xline=None )  # the defaults
    F.fit( x, y )  # -> F.a F.b F.residuals ...
    xtrim, ytrim = F.trimfit( x, y, niter=1 )  # trim nbig points, fit again

 See [https://github.com/denis-bz/plots/issues/1](https://github.com/denis-bz/plots/issues/1)
 for an example plot.

 Fitline args, with defaults:
 ---
 level 0.95:
        the level for confidence / prediction bands, aka 1 - alpha
 nbig 5:
        track the few biggest residuals `|y_i - line(x_i)|`
 verbose 1:  prints `fit()` info like
        fitline 164 points:  intercept 0.52 +- 0.12   slope 0.319 +- 0.1
        residuals: sd 0.627   var 0.394   biggest res^2 are [9 5 5 4 3] ... % of the total
 axis None:
        a matplotlib `fig` or `axis` from e.g. `fig, axes = pl.subplots()`, or `None`.

 Fitline functions
 -----------------
 F.fit( x, y ):
            fit a line `y ~= a + slope * x` to 1d numpy arrays or array-like.
 F.line( newx ):
            the line, `F.a + F.b * newx`. (`F.predict()` is a synonym for `F.line()` .)
 xtrim, ytrim = F.trim( x, y ):
            x, y without the `nbig` points with the biggest residuals, e.g. 100 -> 95 points
 xtrim, ytrim = F.trimfit( x, y, niter=1 ):
            trim `nbig` points, fit again
 F.plot( x, y ):
            is called from `fit()` if `Fitline( axis=fig )` or `axis=axis`.
            It plots the points, `line( F.xline )`,
            and confidence and prediction regions, using matplotlib .
 F.se_conf( xx ), F.se_predict( xx ):
            See wikipedia Confidence_and_prediction_bands

 Fitline fields
 --------------

    a b  slope == b
    se_a se_b  se_slope == se_b
    mean_x mean_y
    residuals res2 sd_res var_res
    bigres jbig xbig ybig
    summary

 Please see the code for details.


 See also: Fitline.md under https://gist.github.com/denis-bz .
 """

 # Credits: https://github.com/thomas-haslwanter/statsintro fitLine.py
 #     author : thomas haslwanter
 #     date : 13.Dec.2012
 #     ver : 1.2


 #...............................................................................
 from __future__ import division
 import numpy as np
 from scipy import stats

 __version__ = "2015-07-02 July  denis-bz-py t-online de"


 #-------------------------------------------------------------------------------
 class Fitline( object ):  #{
    __doc__ = globals()["__doc__"]

    def __init__( s, level=.95, nbig=5, verbose=1, axis=None, xline=None, plotconf=1 ):
        s.level     = level
        s.nbig      = nbig
        s.verbose   = verbose
        s.axis      = axis
        assert axis is None  or hasattr( axis, "plot" )  # figure or axis
        s.xline = xline  # None -> np.linspace( xlo, xhi, 101 )  to plot line and conf / pred
        s.plotconf = plotconf  # 1: plot dashed conf / pred bands first fit() only
        s.color = "orangered"
        if verbose:
            print "\nFitline( level=%.3g, nbig=%d )" % (level, nbig)

    #...........................................................................
    def fit( s, x, y ):
        """ Fit a line to the data, y ~ a + slope * x """
        x = np.squeeze( np.asarray_chkfinite( x ))  # squeeze: n,1 -> n
        y = np.squeeze( np.asarray_chkfinite( y ))
        n = len(x)
        assert x.shape == y.shape == (n,), (x.shape, y.shape)  # 1d
        mean_x = np.mean(x)
        mean_y = np.mean(y)
        Sxx = n * x.var()
        Sxy = np.sum(x*y) - n * mean_x * mean_y
        slope = Sxy / Sxx
        a = mean_y - slope * mean_x  # medians ?

        residuals = y - (a + slope * x)
        res2 = residuals**2
        var_res = np.sum(res2) / (n - 2)
        sd_res = np.sqrt( var_res )

        jbig = res2.argsort() [::-1]  # decreasing
        bigres = res2[ jbig[:s.nbig] ]  # the nbig biggest
        bigres *= 100 / res2.sum()  # or /= sd_res ?

            # Confidence intervals: a +- tval*se_a, slope +- tval*se_slope
        sumx2 = Sxx + n * mean_x**2  # = np.sum( x**2 )
        se_a = sd_res * np.sqrt( sumx2 / (n*Sxx) )
        se_slope = sd_res / np.sqrt( Sxx )

        alpha = 1 - s.level
        tval = stats.t.isf( alpha / 2, n - 2 )  # level .68 .95 -> t ~ 1 2  for large n

        a0 = a if abs(a) > 1e-10  else 0
        summary = "fitline %d points:  intercept %.3g +- %.2g  slope %.3g +- %.2g " % (
                n, a0, tval*se_a, slope, tval*se_slope )

        s.a = s.intercept = a
        s.b = s.slope = slope
        s.se_a      = se_a
        s.se_b = s.se_slope = se_slope
        s.mean_x    = mean_x
        s.mean_y    = mean_y
        s.n         = n
        s.residuals = residuals
        s.res2      = res2
        s.sd_res    = sd_res
        s.var_res   = var_res
        s.bigres    = bigres
        s.jbig      = jbig
        s.xbig      = x[jbig[:s.nbig]]  # to plot / trim the biggest residuals
        s.ybig      = y[jbig[:s.nbig]]
        s.Sxx       = Sxx
        s.tval      = tval
        s.summary   = summary
        # s.x = x.copy()
        # s.y = y.copy()

        if s.xline is None:
            s.xline = np.linspace( x.min(), x.max(), 51 )  # to line_conf_pred / plot
        if s.verbose:
            print "\n%s" % summary
            print "residuals: sd %.3g  variance %.3g  biggest res^2 are %s ... %% of the total " % (
                    sd_res, var_res, _ints( bigres ))
            s.line_conf_pred( s.xline )
            print ""

        if s.axis is not None:
            s.plot( x, y )

        return s

    #...........................................................................
    def line( s, x ):
        x = np.squeeze( np.asarray_chkfinite( x ))
        return s.a + s.slope * x

    predict = line  # fit( X y ), predict( new X ) is common

    def se_conf( s, xx ):  # using sd_res n mean_x Sxx from last fit
        """ confidence band: line +- tval * this """
        xx = np.squeeze( np.asarray_chkfinite( xx ))
        return s.sd_res * np.sqrt( 1/s.n + (xx - s.mean_x)**2 / s.Sxx )

    def se_predict( s, xx ):
        """ prediction band: line +- tval * this """
        xx = np.squeeze( np.asarray_chkfinite( xx ))
        return s.sd_res * np.sqrt( 1 + 1/s.n + (xx - s.mean_x)**2 / s.Sxx )

    def line_conf_pred( s, xline ):
        xline = np.sort(xline)
        line = s.line( xline )
        conf = s.tval * s.se_conf( xline )
        pred = s.tval * s.se_predict( xline )
        if s.verbose:
            print "line:                ", line  # with caller's np.set_printoptions
            print "confidence band:   +-", conf
            print "predict data band: +-", pred
        return line, pred, conf

    #...........................................................................
    def plot( s, x, y ):
        """ plot:
            dots( x y )
            nbig vertical lines to the points with the biggest residuals
            line( xline )
            confidence  yline +- tval * se_conf(x)
            prediction  iyline +- tval * se_predict(x)
        """
        if s.axis is None:
            return

        s.axis.plot( x, y, "b." )  # blue dots

            # vertical lines, x_i y_i -- line, of the few biggest residuals --
            # (vertical lines << squares; rug res^2 ?)
        if s.nbig > 0:
            s.axis.vlines( s.xbig, s.ybig, s.line( s.xbig ), colors=s.color, lw=.5 )
            txt = "%.0f %%" % s.bigres[0]
            s.axis.text( s.xbig[0], s.ybig[0], txt, color=s.color, ha="center", va="center" )

        xline = s.xline
        yline = s.line( xline )
        s.axis.plot( s.xline, yline, color=s.color, lw=2, label=s.summary )
        s.axis.plot( s.mean_x, s.mean_y, "o", color=s.color, markersize=5 )
                # markerfacecolor="none" )  # seaborn bug

            # confidence / prediction bands --
        if s.level > 0  and s.plotconf > 0:
            s.plotconf -= 1
            conf = s.tval * s.se_conf( xline )
            pred = s.tval * s.se_predict( xline )

            label = "%.3g %% confidence band for lines" % (s.level * 100)
            color = "sandybrown"
            s.axis.plot( xline, yline + conf, linestyle="--", color=color, lw=1, label=label )
            s.axis.plot( xline, yline - conf, linestyle="--", color=color, lw=1 )

            label = "%.3g %% prediction band for data" % (s.level * 100)
            s.axis.plot( xline, yline + pred, linestyle="--", color=color, lw=1, label=label )
            s.axis.plot( xline, yline - pred, linestyle="--", color=color, lw=1 )

    #...........................................................................
    def trim( s, x, y ):
        """ *same* x y as fit( x y )
            - nbig points with the biggest residuals
            sorted by res^2 decreasing
        """
        j = s.jbig[s.nbig:]
        return x[j], y[j]  # from last fit()

    #...........................................................................
    def trimfit( s, x, y, niter=1, colors="forestgreen purple dodgerblue" ):
        """ trim, fit again ... -> last x y """
            # keep jbig -> orig x y ? messy
            # cf. RFE, iter trim small features
        if isinstance( colors, basestring ):
            colors = colors.split()

        for jiter in range(niter):
            if len(x) <= s.nbig:  break
            if s.verbose:
                print "trim: %s %% of the total residuals^2 " % _ints( s.bigres )
                print "trim: x %s" % s.xbig  # print with caller's np.set_printoptions
                print "trim: y %s" % s.ybig
            x, y = s.trim( x, y )
            if s.axis:
                s.color = colors[jiter % 3]
            s.fit( x, y )  # -> new .jbig .xbig .ybig

        return x, y

 #}

 def _ints( x ):
    return np.round(x) .astype(int)
diff --git a/test-fitline.py b/test-fitline.py
 #!/usr/bin/env python2
 """ test Fitline: fit lines to data, print plot trim the biggest residuals """

 from __future__ import division
 import sys
 import numpy as np

 from fitline import Fitline  # or Medianline

 __version__ = "2015-07-07 Jul  denis-bz-py t-online de"

 q = np.r_[ 0,25,50,75,100 ]
 def fivenum( X, q=q, axis=None ):
    return np.percentile( X, q, axis=axis )  # 5 x ncol


 #...........................................................................
 csvin = ".webdata/etc/usconsumption.csv"  # consumption, income (y x) from R package fpp
 # csvin = ".webdata/etc/ozone.csv"
 out = ""  # "uscon-fitline.nptxt"
 # csvin = "4.csv"  # 4 points R stackoverflow.com/questions/12544090
 ycol = 0  # yx / xy
 transform = 0  # 1/3  # StatLearn ozone
 tilt = None
 mid = True  # fitline / midline

 level = .95
 nbig = 5
 niter = 1
 nxline = 51
 plot = 0
 seed = 0

    # to change these params in sh or ipython, run this.py  a=1  b=None  c=[3] ...
 for arg in sys.argv[1:]:
    exec( arg )

 np.set_printoptions( threshold=10, edgeitems=5, linewidth=120,
    formatter = dict( float = lambda x: "%.3g" % x ))  # float arrays %.3g
 np.random.seed( seed )

 if plot:
    from matplotlib import pyplot as pl
    import seaborn as sns  # http://stanford.edu/~mwaskom/software/seaborn

    fig, ax = pl.subplots( figsize=[10, 10] )
 else:
    ax = None

 if mid:
    from medianline import Medianline as Fitline

 #...........................................................................
 X = np.loadtxt( csvin )  # , delimiter=",", skiprows=1 )
 print "read", csvin, X.shape
 csvin = csvin.split("/")[-1]

 y = X[:,ycol]  # yx / xy
 x = X[:,1 - ycol]
 jsort = x.argsort()
 x, y = x[jsort], y[jsort]

 if transform > 0:
    print "y ** %.3g" % transform
    y **= transform
 five = fivenum( X, axis=0 )
 print "column percentiles %s: \n%s" % (q, five)

 if tilt is not None:
    print "tilt: y -= %.3g * x" % tilt  # cheap approx ortho / Deming regression
    y -= tilt * x

 xlo, xhi = x.min(), x.max()
 ylo, yhi = y.min(), y.max()
 xline = np.linspace( xlo, xhi, nxline )
 if plot:
    pl.suptitle( "fitline %s, trimming the %d biggest residuals  median %s " % (
            csvin, nbig, mid ))
    ax.set_xlim( xlo, xhi * 1.01 )
    ax.set_ylim( ylo, yhi )

 #...........................................................................
 F = Fitline( nbig=nbig, verbose=1, axis=ax, xline=xline, level=level )  # or Medianline
    # a class instance with .fit( x y ), .trim( x y ), .a .b .residuals ...
 F.fit( x, y )  # print, plot

 if out  and not mid:
    line, conf, pred = F.line_conf_pred( x )
    xyline = np.c_[ x, y, line, conf, pred ]  # compare R
    print "writing", out, xyline.shape
    header = "x  y  line  +- conf  +- pred  %s  level %.3g  %s" % (
            F.summary, level, csvin )
    np.savetxt( out, xyline, fmt="%.3g\t", header=header )

 #...........................................................................
    # trim the biggest points, iterate --
 F.trimfit( x, y, niter=niter )

 if plot:
    ax.legend()
    if plot >= 2:
        try:
            from bz.etc import numpyutil as nu
            nu.signfig( __file__ )
        except ImportError:
            pass
        pl.savefig( "tmp.png" )
    pl.show()

diff --git a/test-midline.log b/test-midline.log
 # from: test-fitline.py
 # run: 7 Jul 2015 14:57  in ~bz/py/etc/stats/fitline  Denis-iMac 10.8.3
 # versions: numpy 1.9.2  scipy 0.15.1   python 2.7.6   mac 10.8.3

 read .webdata/etc/usconsumption.csv (164, 2)
 column percentiles [  0  25  50  75 100]: 
 [[-2.3 -2.31]
 [0.405 0.31]
 [0.8 0.748]
 [1.11 1.26]
 [2.32 4.56]]

 Midline( nbig=5 )

 midline 164 points:  intercept 0.365  slope 0.583 
 residuals: biggest res^2 are [5 5 5 5 4] ... % of the total 

 trim: [5 5 5 5 4] % of the total residuals^2 
 trim: x [-0.621 -1.17 -1.41 -1.36 -0.0577]
 trim: y [1.88 1.55 -2.3 1.41 -1.31]

 midline 159 points:  intercept 0.365  slope 0.573 
 residuals: biggest res^2 are [4 4 4 4 4] ... % of the total
	#!/usr/bin/env python2
	""" Fitline: fit a line to data, print plot trim the biggest residuals

	Example:
	--------

	from fitline import Fitline
	# a class with functions .fit( x y ) ... and fields .a .b .residuals ...

	F = Fitline( level=.95, nbig=5, verbose=1, axis=None, xline=None ) # the defaults
	F.fit( x, y ) # -> F.a F.b F.residuals ...
	xtrim, ytrim = F.trimfit( x, y, niter=1 ) # trim nbig points, fit again

	See [https://github.com/denis-bz/plots/issues/1](https://github.com/denis-bz/plots/issues/1)
	for an example plot.

	Fitline args, with defaults:
	---
	level 0.95:
	the level for confidence / prediction bands, aka 1 - alpha
	nbig 5:
	track the few biggest residuals `\|y_i - line(x_i)\|`
	verbose 1: prints `fit()` info like
	fitline 164 points: intercept 0.52 +- 0.12 slope 0.319 +- 0.1
	residuals: sd 0.627 var 0.394 biggest res^2 are [9 5 5 4 3] ... % of the total
	axis None:
	a matplotlib `fig` or `axis` from e.g. `fig, axes = pl.subplots()`, or `None`.

	Fitline functions
	-----------------
	F.fit( x, y ):
	fit a line `y ~= a + slope * x` to 1d numpy arrays or array-like.
	F.line( newx ):
	the line, `F.a + F.b * newx`. (`F.predict()` is a synonym for `F.line()` .)
	xtrim, ytrim = F.trim( x, y ):
	x, y without the `nbig` points with the biggest residuals, e.g. 100 -> 95 points
	xtrim, ytrim = F.trimfit( x, y, niter=1 ):
	trim `nbig` points, fit again
	F.plot( x, y ):
	is called from `fit()` if `Fitline( axis=fig )` or `axis=axis`.
	It plots the points, `line( F.xline )`,
	and confidence and prediction regions, using matplotlib .
	F.se_conf( xx ), F.se_predict( xx ):
	See wikipedia Confidence_and_prediction_bands

	Fitline fields
	--------------

	a b slope == b
	se_a se_b se_slope == se_b
	mean_x mean_y
	residuals res2 sd_res var_res
	bigres jbig xbig ybig
	summary

	Please see the code for details.


	See also: Fitline.md under https://gist.github.com/denis-bz .
	"""

	# Credits: https://github.com/thomas-haslwanter/statsintro fitLine.py
	# author : thomas haslwanter
	# date : 13.Dec.2012
	# ver : 1.2


	#...............................................................................
	from __future__ import division
	import numpy as np
	from scipy import stats

	__version__ = "2015-07-02 July denis-bz-py t-online de"


	#-------------------------------------------------------------------------------
	class Fitline( object ): #{
	__doc__ = globals()["__doc__"]

	def __init__( s, level=.95, nbig=5, verbose=1, axis=None, xline=None, plotconf=1 ):
	s.level = level
	s.nbig = nbig
	s.verbose = verbose
	s.axis = axis
	assert axis is None or hasattr( axis, "plot" ) # figure or axis
	s.xline = xline # None -> np.linspace( xlo, xhi, 101 ) to plot line and conf / pred
	s.plotconf = plotconf # 1: plot dashed conf / pred bands first fit() only
	s.color = "orangered"
	if verbose:
	print "\nFitline( level=%.3g, nbig=%d )" % (level, nbig)

	#...........................................................................
	def fit( s, x, y ):
	""" Fit a line to the data, y ~ a + slope * x """
	x = np.squeeze( np.asarray_chkfinite( x )) # squeeze: n,1 -> n
	y = np.squeeze( np.asarray_chkfinite( y ))
	n = len(x)
	assert x.shape == y.shape == (n,), (x.shape, y.shape) # 1d
	mean_x = np.mean(x)
	mean_y = np.mean(y)
	Sxx = n * x.var()
	Sxy = np.sum(xy) - n mean_x * mean_y
	slope = Sxy / Sxx
	a = mean_y - slope * mean_x # medians ?

	residuals = y - (a + slope * x)
	res2 = residuals**2
	var_res = np.sum(res2) / (n - 2)
	sd_res = np.sqrt( var_res )

	jbig = res2.argsort() [::-1] # decreasing
	bigres = res2[ jbig[:s.nbig] ] # the nbig biggest
	bigres *= 100 / res2.sum() # or /= sd_res ?

	# Confidence intervals: a +- tvalse_a, slope +- tvalse_slope
	sumx2 = Sxx + n * mean_x2 # = np.sum( x2 )
	se_a = sd_res * np.sqrt( sumx2 / (n*Sxx) )
	se_slope = sd_res / np.sqrt( Sxx )

	alpha = 1 - s.level
	tval = stats.t.isf( alpha / 2, n - 2 ) # level .68 .95 -> t ~ 1 2 for large n

	a0 = a if abs(a) > 1e-10 else 0
	summary = "fitline %d points: intercept %.3g +- %.2g slope %.3g +- %.2g " % (
	n, a0, tvalse_a, slope, tvalse_slope )

	s.a = s.intercept = a
	s.b = s.slope = slope
	s.se_a = se_a
	s.se_b = s.se_slope = se_slope
	s.mean_x = mean_x
	s.mean_y = mean_y
	s.n = n
	s.residuals = residuals
	s.res2 = res2
	s.sd_res = sd_res
	s.var_res = var_res
	s.bigres = bigres
	s.jbig = jbig
	s.xbig = x[jbig[:s.nbig]] # to plot / trim the biggest residuals
	s.ybig = y[jbig[:s.nbig]]
	s.Sxx = Sxx
	s.tval = tval
	s.summary = summary
	# s.x = x.copy()
	# s.y = y.copy()

	if s.xline is None:
	s.xline = np.linspace( x.min(), x.max(), 51 ) # to line_conf_pred / plot
	if s.verbose:
	print "\n%s" % summary
	print "residuals: sd %.3g variance %.3g biggest res^2 are %s ... %% of the total " % (
	sd_res, var_res, _ints( bigres ))
	s.line_conf_pred( s.xline )
	print ""

	if s.axis is not None:
	s.plot( x, y )

	return s

	#...........................................................................
	def line( s, x ):
	x = np.squeeze( np.asarray_chkfinite( x ))
	return s.a + s.slope * x

	predict = line # fit( X y ), predict( new X ) is common

	def se_conf( s, xx ): # using sd_res n mean_x Sxx from last fit
	""" confidence band: line +- tval * this """
	xx = np.squeeze( np.asarray_chkfinite( xx ))
	return s.sd_res * np.sqrt( 1/s.n + (xx - s.mean_x)**2 / s.Sxx )

	def se_predict( s, xx ):
	""" prediction band: line +- tval * this """
	xx = np.squeeze( np.asarray_chkfinite( xx ))
	return s.sd_res * np.sqrt( 1 + 1/s.n + (xx - s.mean_x)**2 / s.Sxx )

	def line_conf_pred( s, xline ):
	xline = np.sort(xline)
	line = s.line( xline )
	conf = s.tval * s.se_conf( xline )
	pred = s.tval * s.se_predict( xline )
	if s.verbose:
	print "line: ", line # with caller's np.set_printoptions
	print "confidence band: +-", conf
	print "predict data band: +-", pred
	return line, pred, conf

	#...........................................................................
	def plot( s, x, y ):
	""" plot:
	dots( x y )
	nbig vertical lines to the points with the biggest residuals
	line( xline )
	confidence yline +- tval * se_conf(x)
	prediction iyline +- tval * se_predict(x)
	"""
	if s.axis is None:
	return

	s.axis.plot( x, y, "b." ) # blue dots

	# vertical lines, x_i y_i -- line, of the few biggest residuals --
	# (vertical lines << squares; rug res^2 ?)
	if s.nbig > 0:
	s.axis.vlines( s.xbig, s.ybig, s.line( s.xbig ), colors=s.color, lw=.5 )
	txt = "%.0f %%" % s.bigres[0]
	s.axis.text( s.xbig[0], s.ybig[0], txt, color=s.color, ha="center", va="center" )

	xline = s.xline
	yline = s.line( xline )
	s.axis.plot( s.xline, yline, color=s.color, lw=2, label=s.summary )
	s.axis.plot( s.mean_x, s.mean_y, "o", color=s.color, markersize=5 )
	# markerfacecolor="none" ) # seaborn bug

	# confidence / prediction bands --
	if s.level > 0 and s.plotconf > 0:
	s.plotconf -= 1
	conf = s.tval * s.se_conf( xline )
	pred = s.tval * s.se_predict( xline )

	label = "%.3g %% confidence band for lines" % (s.level * 100)
	color = "sandybrown"
	s.axis.plot( xline, yline + conf, linestyle="--", color=color, lw=1, label=label )
	s.axis.plot( xline, yline - conf, linestyle="--", color=color, lw=1 )

	label = "%.3g %% prediction band for data" % (s.level * 100)
	s.axis.plot( xline, yline + pred, linestyle="--", color=color, lw=1, label=label )
	s.axis.plot( xline, yline - pred, linestyle="--", color=color, lw=1 )

	#...........................................................................
	def trim( s, x, y ):
	""" same x y as fit( x y )
	- nbig points with the biggest residuals
	sorted by res^2 decreasing
	"""
	j = s.jbig[s.nbig:]
	return x[j], y[j] # from last fit()

	#...........................................................................
	def trimfit( s, x, y, niter=1, colors="forestgreen purple dodgerblue" ):
	""" trim, fit again ... -> last x y """
	# keep jbig -> orig x y ? messy
	# cf. RFE, iter trim small features
	if isinstance( colors, basestring ):
	colors = colors.split()

	for jiter in range(niter):
	if len(x) <= s.nbig: break
	if s.verbose:
	print "trim: %s %% of the total residuals^2 " % _ints( s.bigres )
	print "trim: x %s" % s.xbig # print with caller's np.set_printoptions
	print "trim: y %s" % s.ybig
	x, y = s.trim( x, y )
	if s.axis:
	s.color = colors[jiter % 3]
	s.fit( x, y ) # -> new .jbig .xbig .ybig

	return x, y

	#}

	def _ints( x ):
	return np.round(x) .astype(int)
	#!/usr/bin/env python2
	""" test Fitline: fit lines to data, print plot trim the biggest residuals """

	from __future__ import division
	import sys
	import numpy as np

	from fitline import Fitline # or Medianline

	__version__ = "2015-07-07 Jul denis-bz-py t-online de"

	q = np.r_[ 0,25,50,75,100 ]
	def fivenum( X, q=q, axis=None ):
	return np.percentile( X, q, axis=axis ) # 5 x ncol


	#...........................................................................
	csvin = ".webdata/etc/usconsumption.csv" # consumption, income (y x) from R package fpp
	# csvin = ".webdata/etc/ozone.csv"
	out = "" # "uscon-fitline.nptxt"
	# csvin = "4.csv" # 4 points R stackoverflow.com/questions/12544090
	ycol = 0 # yx / xy
	transform = 0 # 1/3 # StatLearn ozone
	tilt = None
	mid = True # fitline / midline

	level = .95
	nbig = 5
	niter = 1
	nxline = 51
	plot = 0
	seed = 0

	# to change these params in sh or ipython, run this.py a=1 b=None c=[3] ...
	for arg in sys.argv[1:]:
	exec( arg )

	np.set_printoptions( threshold=10, edgeitems=5, linewidth=120,
	formatter = dict( float = lambda x: "%.3g" % x )) # float arrays %.3g
	np.random.seed( seed )

	if plot:
	from matplotlib import pyplot as pl
	import seaborn as sns # http://stanford.edu/~mwaskom/software/seaborn

	fig, ax = pl.subplots( figsize=[10, 10] )
	else:
	ax = None

	if mid:
	from medianline import Medianline as Fitline

	#...........................................................................
	X = np.loadtxt( csvin ) # , delimiter=",", skiprows=1 )
	print "read", csvin, X.shape
	csvin = csvin.split("/")[-1]

	y = X[:,ycol] # yx / xy
	x = X[:,1 - ycol]
	jsort = x.argsort()
	x, y = x[jsort], y[jsort]

	if transform > 0:
	print "y ** %.3g" % transform
	y **= transform
	five = fivenum( X, axis=0 )
	print "column percentiles %s: \n%s" % (q, five)

	if tilt is not None:
	print "tilt: y -= %.3g * x" % tilt # cheap approx ortho / Deming regression
	y -= tilt * x

	xlo, xhi = x.min(), x.max()
	ylo, yhi = y.min(), y.max()
	xline = np.linspace( xlo, xhi, nxline )
	if plot:
	pl.suptitle( "fitline %s, trimming the %d biggest residuals median %s " % (
	csvin, nbig, mid ))
	ax.set_xlim( xlo, xhi * 1.01 )
	ax.set_ylim( ylo, yhi )

	#...........................................................................
	F = Fitline( nbig=nbig, verbose=1, axis=ax, xline=xline, level=level ) # or Medianline
	# a class instance with .fit( x y ), .trim( x y ), .a .b .residuals ...
	F.fit( x, y ) # print, plot

	if out and not mid:
	line, conf, pred = F.line_conf_pred( x )
	xyline = np.c_[ x, y, line, conf, pred ] # compare R
	print "writing", out, xyline.shape
	header = "x y line +- conf +- pred %s level %.3g %s" % (
	F.summary, level, csvin )
	np.savetxt( out, xyline, fmt="%.3g\t", header=header )

	#...........................................................................
	# trim the biggest points, iterate --
	F.trimfit( x, y, niter=niter )

	if plot:
	ax.legend()
	if plot >= 2:
	try:
	from bz.etc import numpyutil as nu
	nu.signfig( __file__ )
	except ImportError:
	pass
	pl.savefig( "tmp.png" )
	pl.show()
	# from: test-fitline.py
	# run: 7 Jul 2015 14:57 in ~bz/py/etc/stats/fitline Denis-iMac 10.8.3
	# versions: numpy 1.9.2 scipy 0.15.1 python 2.7.6 mac 10.8.3

	read .webdata/etc/usconsumption.csv (164, 2)
	column percentiles [ 0 25 50 75 100]:
	[[-2.3 -2.31]
	[0.405 0.31]
	[0.8 0.748]
	[1.11 1.26]
	[2.32 4.56]]

	Midline( nbig=5 )

	midline 164 points: intercept 0.365 slope 0.583
	residuals: biggest res^2 are [5 5 5 5 4] ... % of the total

	trim: [5 5 5 5 4] % of the total residuals^2
	trim: x [-0.621 -1.17 -1.41 -1.36 -0.0577]
	trim: y [1.88 1.55 -2.3 1.41 -1.31]

	midline 159 points: intercept 0.365 slope 0.573
	residuals: biggest res^2 are [4 4 4 4 4] ... % of the total