mikelove · August 29, 2015 14:09
diff --git a/recommended_chunk.R b/recommended_chunk.R
 # The idea: a function that helps decide, for reduce by range,
 # how to optimally chunk GRanges into a GRangesList.

 # Practically, this involves sampling the size of the imported data
 # for a subset of cells in the (ranges, files) matrix, and
 # asking/estimating the amount of memory available for each worker.



 # user code

 library(GenomicFiles)
 register(SerialParam())
 # use 4 BigWigs from
 # ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCshlLongRnaSeq/
 files <- list.files("~/data/bigwig/",full=TRUE)
 library(TxDb.Hsapiens.UCSC.hg19.knownGene)
 si <- seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene)
 ranges <- tileGenome(si,tilewidth=1e7,cut.last.tile.in.chrom=TRUE)
 ranges <- keepStandardChromosomes(ranges)


 ### BEGIN: my proposed functions

 #' most likely an unexported function
 #' 
 #' @param ranges the ranges over which to sample
 #' @param files the files over which to sample
 #' @param IMPORT the function which will import data one range and one file
 #' at a time. The point is to observe the maximal memory size in R of the object
 #' which will be operated on in the MAP and REDUCE calls.
 #' @param minWidth given that tileGenome() produces tiles of varying width,
 #' this allows the user to insist that the sampling should be only over tiles
 #' of a given minWidth.
 #'
 #' @return the function returns the object size in bytes of one imported object
 #' from the (ranges, files) matrix
 #' 
 sampleRandomRangeAndFile <- function(ranges, files, IMPORT, minWidth=NULL) {
  if (is.null(minWidth)) {
    idx <- sample(seq_along(ranges),1)
  } else {
    idx <- sample(which(width(ranges) >= minWidth),1)
  }
  range <- ranges[idx]
  file <- files[sample(seq_along(files),1)]
  object.size(IMPORT(range, file))
 }

 #' the user-facing function
 #'
 #' @param ceiling the maximal memory in megabytes to be used over all files for a single range
 #' @param upperBoundFunction a function which takes an upper bound as an esimate from
 #' the vector of sizes of the sample objects
 #' @param ranges the ranges over which to sample
 #' @param files to files over which to sample
 #' @param IMPORT the function which will import data one range and one file
 #' at a time. The point is to observe the maximal memory size in R of the object
 #' which will be operated on in the MAP and REDUCE calls.
 #' @param minWidth given that tileGenome() produces tiles of varying width,
 #' this allows the user to insist that the sampling should be only over tiles
 #' of a given minWidth.
 #' @param n the number of cells to sample
 #'
 #' @return the recommended number of ranges to chunk at one time
 #' 
 recommendedChunk <- function(ceiling, upperBoundFunction, ranges, files, IMPORT, minWidth=NULL, n=20) {
  taste <- replicate(n, sampleRandomRangeAndFile(ranges=ranges, files=files,
                                                 IMPORT=IMPORT, minWidth=minWidth))
  upper.bound <- upperBoundFunction(taste) / 2^20
  ceiling / (upper.bound * length(files))
 }

 ### END: my proposed functions


 # user code:

 IMPORT <- function(range, file) {
  import(file, which = range, as = "Rle", format = "bw")[range]
 }

 upperBoundFunction <- function(x) mean(x) + 2 * sd(x)

 (rc <- recommendedChunk(50, upperBoundFunction, ranges, files, IMPORT, minWidth=1e7))

 (nchunks <- round(length(ranges)/rc))
 f <- factor(sort(rep(seq_len(nchunks), length=length(ranges))))
 mean(table(f))
 rangeslist <- split(ranges, f)

 ##

 MAP <- function(range, file) {
  rle <- import(file, which = range, as = "Rle", format = "bw")[range]
  mean(rle)
 }

 REDUCE <- function(mapped) {
  Reduce(cbind, mapped)
 }

 rangeslist.sub <- rangeslist[1:2]
 (res <- reduceByRange(rangeslist.sub, files, MAP, REDUCE))
 (resOut <- do.call(rbind, res))
 sessionInfo()

diff --git a/recommended_chunk.Rout b/recommended_chunk.Rout

 R Under development (unstable) (2014-10-15 r66774) -- "Unsuffered Consequences"
 Copyright (C) 2014 The R Foundation for Statistical Computing
 Platform: x86_64-apple-darwin12.5.0 (64-bit)

 R is free software and comes with ABSOLUTELY NO WARRANTY.
 You are welcome to redistribute it under certain conditions.
 Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

 R is a collaborative project with many contributors.
 Type 'contributors()' for more information and
 'citation()' on how to cite R or R packages in publications.

 Type 'demo()' for some demos, 'help()' for on-line help, or
 'help.start()' for an HTML browser interface to help.
 Type 'q()' to quit R.

 [Previously saved workspace restored]

 Bioconductor version 3.1 (BiocInstaller 1.17.1), ?biocLite for help
 > # The idea: a function that helps decide, for reduce by range,
 > # how to optimally chunk GRanges into a GRangesList.
 > 
 > # Practically, this involves sampling the size of the imported data
 > # for a subset of cells in the (ranges, files) matrix, and
 > # asking/estimating the amount of memory available for each worker.
 > 
 > 
 > 
 > # user code
 > 
 > library(GenomicFiles)
 Loading required package: BiocGenerics
 Loading required package: parallel

 Attaching package: ‘BiocGenerics’

 The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

 The following object is masked from ‘package:stats’:

    xtabs

 The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, as.vector, cbind, colnames,
    do.call, duplicated, eval, evalq, Filter, Find, get, intersect,
    is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
    pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int,
    rownames, sapply, setdiff, sort, table, tapply, union, unique,
    unlist, unsplit

 Loading required package: S4Vectors
 Loading required package: stats4
 Loading required package: IRanges
 Loading required package: Rsamtools
 Loading required package: GenomeInfoDb
 Loading required package: GenomicRanges
 Loading required package: XVector
 Loading required package: Biostrings
 Loading required package: rtracklayer
 Loading required package: BiocParallel
 > register(SerialParam())
 > # use 4 BigWigs from
 > # ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCshlLongRnaSeq/
 > files <- list.files("~/data/bigwig/",full=TRUE)
 > library(TxDb.Hsapiens.UCSC.hg19.knownGene)
 Loading required package: GenomicFeatures
 Loading required package: AnnotationDbi
 Loading required package: Biobase
 Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.


 Attaching package: ‘AnnotationDbi’

 The following object is masked from ‘package:GenomeInfoDb’:

    species

 > si <- seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene)
 > ranges <- tileGenome(si,tilewidth=1e7,cut.last.tile.in.chrom=TRUE)
 > ranges <- keepStandardChromosomes(ranges)
 > 
 > 
 > ### BEGIN: my proposed functions
 > 
 > #' most likely an unexported function
 > #' 
 > #' @param ranges the ranges over which to sample
 > #' @param files the files over which to sample
 > #' @param IMPORT the function which will import data one range and one file
 > #' at a time. The point is to observe the maximal memory size in R of the object
 > #' which will be operated on in the MAP and REDUCE calls.
 > #' @param minWidth given that tileGenome() produces tiles of varying width,
 > #' this allows the user to insist that the sampling should be only over tiles
 > #' of a given minWidth.
 > #'
 > #' @return the function returns the object size in bytes of one imported object
 > #' from the (ranges, files) matrix
 > #' 
 > sampleRandomRangeAndFile <- function(ranges, files, IMPORT, minWidth=NULL) {
 +   if (is.null(minWidth)) {
 +     idx <- sample(seq_along(ranges),1)
 +   } else {
 +     idx <- sample(which(width(ranges) >= minWidth),1)
 +   }
 +   range <- ranges[idx]
 +   file <- files[sample(seq_along(files),1)]
 +   object.size(IMPORT(range, file))
 + }
 > 
 > #' the user-facing function
 > #'
 > #' @param ceiling the maximal memory in megabytes to be used over all files for a single range
 > #' @param upperBoundFunction a function which takes an upper bound as an esimate from
 > #' the vector of sizes of the sample objects
 > #' @param ranges the ranges over which to sample
 > #' @param files to files over which to sample
 > #' @param IMPORT the function which will import data one range and one file
 > #' at a time. The point is to observe the maximal memory size in R of the object
 > #' which will be operated on in the MAP and REDUCE calls.
 > #' @param minWidth given that tileGenome() produces tiles of varying width,
 > #' this allows the user to insist that the sampling should be only over tiles
 > #' of a given minWidth.
 > #' @param n the number of cells to sample
 > #'
 > #' @return the recommended number of ranges to chunk at one time
 > #' 
 > recommendedChunk <- function(ceiling, upperBoundFunction, ranges, files, IMPORT, minWidth=NULL, n=20) {
 +   taste <- replicate(n, sampleRandomRangeAndFile(ranges=ranges, files=files,
 +                                                  IMPORT=IMPORT, minWidth=minWidth))
 +   upper.bound <- upperBoundFunction(taste) / 2^20
 +   ceiling / (upper.bound * length(files))
 + }
 > 
 > ### END: my proposed functions
 > 
 > 
 > # user code:
 > 
 > IMPORT <- function(range, file) {
 +   import(file, which = range, as = "Rle", format = "bw")[range]
 + }
 > 
 > upperBoundFunction <- function(x) mean(x) + 2 * sd(x)
 > 
 > (rc <- recommendedChunk(50, upperBoundFunction, ranges, files, IMPORT, minWidth=1e7))
 [1] 5.774204
 > 
 > (nchunks <- round(length(ranges)/rc))
 [1] 56
 > f <- factor(sort(rep(seq_len(nchunks), length=length(ranges))))
 > mean(table(f))
 [1] 5.767857
 > rangeslist <- split(ranges, f)
 > 
 > ##
 > 
 > MAP <- function(range, file) {
 +   rle <- import(file, which = range, as = "Rle", format = "bw")[range]
 +   mean(rle)
 + }
 > 
 > REDUCE <- function(mapped) {
 +   Reduce(cbind, mapped)
 + }
 > 
 > rangeslist.sub <- rangeslist[1:2]
 > (res <- reduceByRange(rangeslist.sub, files, MAP, REDUCE))
 $`1`
         init                              
 chr1 2.772341 2.2201188 3.0041389 2.3258021
 chr1 3.025716 3.1525184 2.4992048 2.6526405
 chr1 1.320788 1.0617699 1.7907983 1.5505196
 chr1 1.151013 0.9161564 1.4714571 1.1631958
 chr1 1.235252 0.9907793 1.6580848 1.4077788
 chr1 1.084928 0.8862725 0.4555268 0.3595216

 $`2`
           init                               
 chr1  0.6801988  0.5708432 2.3426269 1.8456729
 chr1  0.3163817  0.2278151 0.4011951 0.2862280
 chr1  0.2942333  0.2352593 0.3319902 0.2584270
 chr1 22.2271557 21.0384108 0.9220905 0.7089621
 chr1  1.0939601  1.4992705 0.5730236 0.4314522
 chr1  0.8636410  0.6543298 1.3248102 0.9992793

 > (resOut <- do.call(rbind, res))
           init                               
 chr1  2.7723412  2.2201188 3.0041389 2.3258021
 chr1  3.0257160  3.1525184 2.4992048 2.6526405
 chr1  1.3207877  1.0617699 1.7907983 1.5505196
 chr1  1.1510127  0.9161564 1.4714571 1.1631958
 chr1  1.2352518  0.9907793 1.6580848 1.4077788
 chr1  1.0849282  0.8862725 0.4555268 0.3595216
 chr1  0.6801988  0.5708432 2.3426269 1.8456729
 chr1  0.3163817  0.2278151 0.4011951 0.2862280
 chr1  0.2942333  0.2352593 0.3319902 0.2584270
 chr1 22.2271557 21.0384108 0.9220905 0.7089621
 chr1  1.0939601  1.4992705 0.5730236 0.4314522
 chr1  0.8636410  0.6543298 1.3248102 0.9992793
 > sessionInfo()
 R Under development (unstable) (2014-10-15 r66774)
 Platform: x86_64-apple-darwin12.5.0 (64-bit)

 locale:
 [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

 attached base packages:
 [1] stats4    parallel  stats     graphics  grDevices datasets  utils    
 [8] methods   base     

 other attached packages:
 [1] TxDb.Hsapiens.UCSC.hg19.knownGene_3.0.0
 [2] GenomicFeatures_1.19.6                 
 [3] AnnotationDbi_1.29.1                   
 [4] Biobase_2.27.0                         
 [5] GenomicFiles_1.3.8                     
 [6] BiocParallel_1.1.5                     
 [7] rtracklayer_1.27.5                     
 [8] Rsamtools_1.19.9                       
 [9] Biostrings_2.35.4                      
 [10] XVector_0.7.2                          
 [11] GenomicRanges_1.19.10                  
 [12] GenomeInfoDb_1.3.7                     
 [13] IRanges_2.1.14                         
 [14] S4Vectors_0.5.7                        
 [15] BiocGenerics_0.13.2                    
 [16] RUnit_0.4.27                           
 [17] devtools_1.6.1                         
 [18] slidify_0.4.5                          
 [19] knitr_1.7                              
 [20] BiocInstaller_1.17.1                   

 loaded via a namespace (and not attached):
 [1] base64enc_0.1-2         BatchJobs_1.5           BBmisc_1.8             
 [4] biomaRt_2.23.1          bitops_1.0-6            brew_1.0-6             
 [7] checkmate_1.5.0         codetools_0.2-9         DBI_0.3.1              
 [10] digest_0.6.4            evaluate_0.5.5          fail_1.2               
 [13] foreach_1.4.2           formatR_1.0             GenomicAlignments_1.3.8
 [16] iterators_1.0.7         markdown_0.7.4          RCurl_1.95-4.3         
 [19] RSQLite_1.0.0           sendmailR_1.2-1         stringr_0.6.2          
 [22] tools_3.2.0             whisker_0.3-2           XML_3.98-1.1           
 [25] yaml_2.1.13             zlibbioc_1.13.0        
 > 
 > 
 > proc.time()
   user  system elapsed 
 23.510   0.729  24.338
	# The idea: a function that helps decide, for reduce by range,
	# how to optimally chunk GRanges into a GRangesList.

	# Practically, this involves sampling the size of the imported data
	# for a subset of cells in the (ranges, files) matrix, and
	# asking/estimating the amount of memory available for each worker.



	# user code

	library(GenomicFiles)
	register(SerialParam())
	# use 4 BigWigs from
	# ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCshlLongRnaSeq/
	files <- list.files("~/data/bigwig/",full=TRUE)
	library(TxDb.Hsapiens.UCSC.hg19.knownGene)
	si <- seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene)
	ranges <- tileGenome(si,tilewidth=1e7,cut.last.tile.in.chrom=TRUE)
	ranges <- keepStandardChromosomes(ranges)


	### BEGIN: my proposed functions

	#' most likely an unexported function
	#'
	#' @param ranges the ranges over which to sample
	#' @param files the files over which to sample
	#' @param IMPORT the function which will import data one range and one file
	#' at a time. The point is to observe the maximal memory size in R of the object
	#' which will be operated on in the MAP and REDUCE calls.
	#' @param minWidth given that tileGenome() produces tiles of varying width,
	#' this allows the user to insist that the sampling should be only over tiles
	#' of a given minWidth.
	#'
	#' @return the function returns the object size in bytes of one imported object
	#' from the (ranges, files) matrix
	#'
	sampleRandomRangeAndFile <- function(ranges, files, IMPORT, minWidth=NULL) {
	if (is.null(minWidth)) {
	idx <- sample(seq_along(ranges),1)
	} else {
	idx <- sample(which(width(ranges) >= minWidth),1)
	}
	range <- ranges[idx]
	file <- files[sample(seq_along(files),1)]
	object.size(IMPORT(range, file))
	}

	#' the user-facing function
	#'
	#' @param ceiling the maximal memory in megabytes to be used over all files for a single range
	#' @param upperBoundFunction a function which takes an upper bound as an esimate from
	#' the vector of sizes of the sample objects
	#' @param ranges the ranges over which to sample
	#' @param files to files over which to sample
	#' @param IMPORT the function which will import data one range and one file
	#' at a time. The point is to observe the maximal memory size in R of the object
	#' which will be operated on in the MAP and REDUCE calls.
	#' @param minWidth given that tileGenome() produces tiles of varying width,
	#' this allows the user to insist that the sampling should be only over tiles
	#' of a given minWidth.
	#' @param n the number of cells to sample
	#'
	#' @return the recommended number of ranges to chunk at one time
	#'
	recommendedChunk <- function(ceiling, upperBoundFunction, ranges, files, IMPORT, minWidth=NULL, n=20) {
	taste <- replicate(n, sampleRandomRangeAndFile(ranges=ranges, files=files,
	IMPORT=IMPORT, minWidth=minWidth))
	upper.bound <- upperBoundFunction(taste) / 2^20
	ceiling / (upper.bound * length(files))
	}

	### END: my proposed functions


	# user code:

	IMPORT <- function(range, file) {
	import(file, which = range, as = "Rle", format = "bw")[range]
	}

	upperBoundFunction <- function(x) mean(x) + 2 * sd(x)

	(rc <- recommendedChunk(50, upperBoundFunction, ranges, files, IMPORT, minWidth=1e7))

	(nchunks <- round(length(ranges)/rc))
	f <- factor(sort(rep(seq_len(nchunks), length=length(ranges))))
	mean(table(f))
	rangeslist <- split(ranges, f)

	##

	MAP <- function(range, file) {
	rle <- import(file, which = range, as = "Rle", format = "bw")[range]
	mean(rle)
	}

	REDUCE <- function(mapped) {
	Reduce(cbind, mapped)
	}

	rangeslist.sub <- rangeslist[1:2]
	(res <- reduceByRange(rangeslist.sub, files, MAP, REDUCE))
	(resOut <- do.call(rbind, res))
	sessionInfo()

	R Under development (unstable) (2014-10-15 r66774) -- "Unsuffered Consequences"
	Copyright (C) 2014 The R Foundation for Statistical Computing
	Platform: x86_64-apple-darwin12.5.0 (64-bit)

	R is free software and comes with ABSOLUTELY NO WARRANTY.
	You are welcome to redistribute it under certain conditions.
	Type 'license()' or 'licence()' for distribution details.

	Natural language support but running in an English locale

	R is a collaborative project with many contributors.
	Type 'contributors()' for more information and
	'citation()' on how to cite R or R packages in publications.

	Type 'demo()' for some demos, 'help()' for on-line help, or
	'help.start()' for an HTML browser interface to help.
	Type 'q()' to quit R.

	[Previously saved workspace restored]

	Bioconductor version 3.1 (BiocInstaller 1.17.1), ?biocLite for help
	> # The idea: a function that helps decide, for reduce by range,
	> # how to optimally chunk GRanges into a GRangesList.
	>
	> # Practically, this involves sampling the size of the imported data
	> # for a subset of cells in the (ranges, files) matrix, and
	> # asking/estimating the amount of memory available for each worker.
	>
	>
	>
	> # user code
	>
	> library(GenomicFiles)
	Loading required package: BiocGenerics
	Loading required package: parallel

	Attaching package: ‘BiocGenerics’

	The following objects are masked from ‘package:parallel’:

	clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
	clusterExport, clusterMap, parApply, parCapply, parLapply,
	parLapplyLB, parRapply, parSapply, parSapplyLB

	The following object is masked from ‘package:stats’:

	xtabs

	The following objects are masked from ‘package:base’:

	anyDuplicated, append, as.data.frame, as.vector, cbind, colnames,
	do.call, duplicated, eval, evalq, Filter, Find, get, intersect,
	is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
	pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int,
	rownames, sapply, setdiff, sort, table, tapply, union, unique,
	unlist, unsplit

	Loading required package: S4Vectors
	Loading required package: stats4
	Loading required package: IRanges
	Loading required package: Rsamtools
	Loading required package: GenomeInfoDb
	Loading required package: GenomicRanges
	Loading required package: XVector
	Loading required package: Biostrings
	Loading required package: rtracklayer
	Loading required package: BiocParallel
	> register(SerialParam())
	> # use 4 BigWigs from
	> # ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCshlLongRnaSeq/
	> files <- list.files("~/data/bigwig/",full=TRUE)
	> library(TxDb.Hsapiens.UCSC.hg19.knownGene)
	Loading required package: GenomicFeatures
	Loading required package: AnnotationDbi
	Loading required package: Biobase
	Welcome to Bioconductor

	Vignettes contain introductory material; view with
	'browseVignettes()'. To cite Bioconductor, see
	'citation("Biobase")', and for packages 'citation("pkgname")'.


	Attaching package: ‘AnnotationDbi’

	The following object is masked from ‘package:GenomeInfoDb’:

	species

	> si <- seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene)
	> ranges <- tileGenome(si,tilewidth=1e7,cut.last.tile.in.chrom=TRUE)
	> ranges <- keepStandardChromosomes(ranges)
	>
	>
	> ### BEGIN: my proposed functions
	>
	> #' most likely an unexported function
	> #'
	> #' @param ranges the ranges over which to sample
	> #' @param files the files over which to sample
	> #' @param IMPORT the function which will import data one range and one file
	> #' at a time. The point is to observe the maximal memory size in R of the object
	> #' which will be operated on in the MAP and REDUCE calls.
	> #' @param minWidth given that tileGenome() produces tiles of varying width,
	> #' this allows the user to insist that the sampling should be only over tiles
	> #' of a given minWidth.
	> #'
	> #' @return the function returns the object size in bytes of one imported object
	> #' from the (ranges, files) matrix
	> #'
	> sampleRandomRangeAndFile <- function(ranges, files, IMPORT, minWidth=NULL) {
	+ if (is.null(minWidth)) {
	+ idx <- sample(seq_along(ranges),1)
	+ } else {
	+ idx <- sample(which(width(ranges) >= minWidth),1)
	+ }
	+ range <- ranges[idx]
	+ file <- files[sample(seq_along(files),1)]
	+ object.size(IMPORT(range, file))
	+ }
	>
	> #' the user-facing function
	> #'
	> #' @param ceiling the maximal memory in megabytes to be used over all files for a single range
	> #' @param upperBoundFunction a function which takes an upper bound as an esimate from
	> #' the vector of sizes of the sample objects
	> #' @param ranges the ranges over which to sample
	> #' @param files to files over which to sample
	> #' @param IMPORT the function which will import data one range and one file
	> #' at a time. The point is to observe the maximal memory size in R of the object
	> #' which will be operated on in the MAP and REDUCE calls.
	> #' @param minWidth given that tileGenome() produces tiles of varying width,
	> #' this allows the user to insist that the sampling should be only over tiles
	> #' of a given minWidth.
	> #' @param n the number of cells to sample
	> #'
	> #' @return the recommended number of ranges to chunk at one time
	> #'
	> recommendedChunk <- function(ceiling, upperBoundFunction, ranges, files, IMPORT, minWidth=NULL, n=20) {
	+ taste <- replicate(n, sampleRandomRangeAndFile(ranges=ranges, files=files,
	+ IMPORT=IMPORT, minWidth=minWidth))
	+ upper.bound <- upperBoundFunction(taste) / 2^20
	+ ceiling / (upper.bound * length(files))
	+ }
	>
	> ### END: my proposed functions
	>
	>
	> # user code:
	>
	> IMPORT <- function(range, file) {
	+ import(file, which = range, as = "Rle", format = "bw")[range]
	+ }
	>
	> upperBoundFunction <- function(x) mean(x) + 2 * sd(x)
	>
	> (rc <- recommendedChunk(50, upperBoundFunction, ranges, files, IMPORT, minWidth=1e7))
	[1] 5.774204
	>
	> (nchunks <- round(length(ranges)/rc))
	[1] 56
	> f <- factor(sort(rep(seq_len(nchunks), length=length(ranges))))
	> mean(table(f))
	[1] 5.767857
	> rangeslist <- split(ranges, f)
	>
	> ##
	>
	> MAP <- function(range, file) {
	+ rle <- import(file, which = range, as = "Rle", format = "bw")[range]
	+ mean(rle)
	+ }
	>
	> REDUCE <- function(mapped) {
	+ Reduce(cbind, mapped)
	+ }
	>
	> rangeslist.sub <- rangeslist[1:2]
	> (res <- reduceByRange(rangeslist.sub, files, MAP, REDUCE))
	$`1`
	init
	chr1 2.772341 2.2201188 3.0041389 2.3258021
	chr1 3.025716 3.1525184 2.4992048 2.6526405
	chr1 1.320788 1.0617699 1.7907983 1.5505196
	chr1 1.151013 0.9161564 1.4714571 1.1631958
	chr1 1.235252 0.9907793 1.6580848 1.4077788
	chr1 1.084928 0.8862725 0.4555268 0.3595216

	$`2`
	init
	chr1 0.6801988 0.5708432 2.3426269 1.8456729
	chr1 0.3163817 0.2278151 0.4011951 0.2862280
	chr1 0.2942333 0.2352593 0.3319902 0.2584270
	chr1 22.2271557 21.0384108 0.9220905 0.7089621
	chr1 1.0939601 1.4992705 0.5730236 0.4314522
	chr1 0.8636410 0.6543298 1.3248102 0.9992793

	> (resOut <- do.call(rbind, res))
	init
	chr1 2.7723412 2.2201188 3.0041389 2.3258021
	chr1 3.0257160 3.1525184 2.4992048 2.6526405
	chr1 1.3207877 1.0617699 1.7907983 1.5505196
	chr1 1.1510127 0.9161564 1.4714571 1.1631958
	chr1 1.2352518 0.9907793 1.6580848 1.4077788
	chr1 1.0849282 0.8862725 0.4555268 0.3595216
	chr1 0.6801988 0.5708432 2.3426269 1.8456729
	chr1 0.3163817 0.2278151 0.4011951 0.2862280
	chr1 0.2942333 0.2352593 0.3319902 0.2584270
	chr1 22.2271557 21.0384108 0.9220905 0.7089621
	chr1 1.0939601 1.4992705 0.5730236 0.4314522
	chr1 0.8636410 0.6543298 1.3248102 0.9992793
	> sessionInfo()
	R Under development (unstable) (2014-10-15 r66774)
	Platform: x86_64-apple-darwin12.5.0 (64-bit)

	locale:
	[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

	attached base packages:
	[1] stats4 parallel stats graphics grDevices datasets utils
	[8] methods base

	other attached packages:
	[1] TxDb.Hsapiens.UCSC.hg19.knownGene_3.0.0
	[2] GenomicFeatures_1.19.6
	[3] AnnotationDbi_1.29.1
	[4] Biobase_2.27.0
	[5] GenomicFiles_1.3.8
	[6] BiocParallel_1.1.5
	[7] rtracklayer_1.27.5
	[8] Rsamtools_1.19.9
	[9] Biostrings_2.35.4
	[10] XVector_0.7.2
	[11] GenomicRanges_1.19.10
	[12] GenomeInfoDb_1.3.7
	[13] IRanges_2.1.14
	[14] S4Vectors_0.5.7
	[15] BiocGenerics_0.13.2
	[16] RUnit_0.4.27
	[17] devtools_1.6.1
	[18] slidify_0.4.5
	[19] knitr_1.7
	[20] BiocInstaller_1.17.1

	loaded via a namespace (and not attached):
	[1] base64enc_0.1-2 BatchJobs_1.5 BBmisc_1.8
	[4] biomaRt_2.23.1 bitops_1.0-6 brew_1.0-6
	[7] checkmate_1.5.0 codetools_0.2-9 DBI_0.3.1
	[10] digest_0.6.4 evaluate_0.5.5 fail_1.2
	[13] foreach_1.4.2 formatR_1.0 GenomicAlignments_1.3.8
	[16] iterators_1.0.7 markdown_0.7.4 RCurl_1.95-4.3
	[19] RSQLite_1.0.0 sendmailR_1.2-1 stringr_0.6.2
	[22] tools_3.2.0 whisker_0.3-2 XML_3.98-1.1
	[25] yaml_2.1.13 zlibbioc_1.13.0
	>
	>
	> proc.time()
	user system elapsed
	23.510 0.729 24.338