alexpreynolds · May 28, 2020 21:33
diff --git a/chromCor2.Rscript b/chromCor2.Rscript
 #! /usr/bin/env Rscript
 # -*- mode: R -*-
 # Above tells emacs to go into R mode for editing (must be second line).

 ##
 ## chromCor:
 ##
 ##   Compute correlation of two jarched bed files along a chromosome (default chr19).
 ##
 ## To run this script, turn the execute bit on and run just as any other script file. The Rscript
 ## command must be in your path.  Rscript comes with R distributions 2.8 (?) and higher, and is in the
 ## same directory as the R script itself (eg, /usr/local/R/bin).
 ##
 ## Author: RET
 ## Date:  20 Aug 2009

 usage <- paste('Usage:  chromCor [-c <chromosome>] [-n <column number>] [-z] [-u] <bed1.jarch> <bed2.jarch>',
               '',
               'Compute correlation of two bed files along a chromosome (default chr19).',
               '',
               'By default, <bed1.jarch> and <bed2.jarch> are jarched 5-column bed files, with score to be correlated in the',
               'fifth column.  There must be the same number of entries for the desired chromosome in each bed file.',
               '',
               '<chromosome> is a chromosome name; by default chr19, or "all", for all entries in the bed file.',
               '',
               '<column number> is the column in each file containing the data to correlate; by default, 5.',
               '',
               '-s specifies that bed files are starched instead of jarched.',
               '',
               '-z specifies that bed files are gzipped instead of jarched.',
               '',
               '-u specifies that bed files are uncompressed (not jarched or gzipped).',
               '',
               'Expects gchr and Rscript to be in the user\'s path.',
               '',
               'Writes results to stdout.',
               '',
               '\n',
               sep = '\n')

 args <- commandArgs(T)

 if(length(args) <= 1){
  cat(usage)
  quit('no')
 }

 chr <- 'chr19'
 col <- 5
 uncompr <- c('', '')

 i <- 1
 while(substr(args[i], 1, 1) == '-'){
  opt <- substr(args[i], 2, 2)
  if(opt == 'c'){
    chr <- args[i+1]
    if(chr == 'all') chr = ''
    i <- i + 2
  }else if(opt == 'n'){
    col <- as.integer(args[i+1])
    i <- i + 2
  }else if(opt == 'z'){
    uncompr <- c('zcat', 'zcat')
    i <- i + 1
  }else if(opt == 's'){
    uncompr <- c('unstarch', 'unstarch')
    i <- i + 1
  }else if(opt == 'u'){
    uncompr <- c('cat', 'cat')
    i <- i + 1
  }else{
    cat('Unrecognized option:', args[i], '\n', file = stderr())
    cat(usage, file = stderr())
    quit('no')
  }
 }

 bed <- args[c(i, i+1)]
 rds <- list()

 for(i in 1:2){
  if(uncompr[i] == ''){
    ext <- gsub('.*\\.', '', bed[i])
    if( ext == "bed"){
      uncompr[i] <- 'cat'
    } else if( ext == "jarch"){
      uncompr[i] <- 'gchr'
    } else if( ext == "starch"){
      uncompr[i] <- 'unstarch'
    } else if( ext == "gz" || ext == "zip"){
      uncompr[i] <- 'zcat'
    } else{
      cat('Unrecognized file extension:', bed[i], '\n', file = stderr())
      quit('no')
    }
  }

  if(chr == '')
    cmd <- paste(uncompr[i], '%BED')
  else{
    if(uncompr[i] == "cat" || uncompr[i] == "zcat"){
      cmd <- paste(uncompr[i], ' %BED | grep "^', chr, '" -', sep = '')
    }else if(uncompr[i] == "unstarch" || uncompr[i] == "gchr"){
      cmd <- paste(uncompr[i], chr, '%BED')
    }
  }

  cmd <- paste(cmd, ' | cut -f', col, sep = '')

  cat('Reading', bed[i], '...\n', file = stderr())

  con <- pipe(gsub('%BED', bed[i], cmd))
  rds[[i]] <- scan(con, what = numeric(0))
  close(con)
 }

 cv <- cor(rds[[1]], rds[[2]])
 cat(cv, '\n')
	#! /usr/bin/env Rscript
	# -- mode: R --
	# Above tells emacs to go into R mode for editing (must be second line).

	##
	## chromCor:
	##
	## Compute correlation of two jarched bed files along a chromosome (default chr19).
	##
	## To run this script, turn the execute bit on and run just as any other script file. The Rscript
	## command must be in your path. Rscript comes with R distributions 2.8 (?) and higher, and is in the
	## same directory as the R script itself (eg, /usr/local/R/bin).
	##
	## Author: RET
	## Date: 20 Aug 2009

	usage <- paste('Usage: chromCor [-c <chromosome>] [-n <column number>] [-z] [-u] <bed1.jarch> <bed2.jarch>',
	'',
	'Compute correlation of two bed files along a chromosome (default chr19).',
	'',
	'By default, <bed1.jarch> and <bed2.jarch> are jarched 5-column bed files, with score to be correlated in the',
	'fifth column. There must be the same number of entries for the desired chromosome in each bed file.',
	'',
	'<chromosome> is a chromosome name; by default chr19, or "all", for all entries in the bed file.',
	'',
	'<column number> is the column in each file containing the data to correlate; by default, 5.',
	'',
	'-s specifies that bed files are starched instead of jarched.',
	'',
	'-z specifies that bed files are gzipped instead of jarched.',
	'',
	'-u specifies that bed files are uncompressed (not jarched or gzipped).',
	'',
	'Expects gchr and Rscript to be in the user\'s path.',
	'',
	'Writes results to stdout.',
	'',
	'\n',
	sep = '\n')

	args <- commandArgs(T)

	if(length(args) <= 1){
	cat(usage)
	quit('no')
	}

	chr <- 'chr19'
	col <- 5
	uncompr <- c('', '')

	i <- 1
	while(substr(args[i], 1, 1) == '-'){
	opt <- substr(args[i], 2, 2)
	if(opt == 'c'){
	chr <- args[i+1]
	if(chr == 'all') chr = ''
	i <- i + 2
	}else if(opt == 'n'){
	col <- as.integer(args[i+1])
	i <- i + 2
	}else if(opt == 'z'){
	uncompr <- c('zcat', 'zcat')
	i <- i + 1
	}else if(opt == 's'){
	uncompr <- c('unstarch', 'unstarch')
	i <- i + 1
	}else if(opt == 'u'){
	uncompr <- c('cat', 'cat')
	i <- i + 1
	}else{
	cat('Unrecognized option:', args[i], '\n', file = stderr())
	cat(usage, file = stderr())
	quit('no')
	}
	}

	bed <- args[c(i, i+1)]
	rds <- list()

	for(i in 1:2){
	if(uncompr[i] == ''){
	ext <- gsub('.*\\.', '', bed[i])
	if( ext == "bed"){
	uncompr[i] <- 'cat'
	} else if( ext == "jarch"){
	uncompr[i] <- 'gchr'
	} else if( ext == "starch"){
	uncompr[i] <- 'unstarch'
	} else if( ext == "gz" \|\| ext == "zip"){
	uncompr[i] <- 'zcat'
	} else{
	cat('Unrecognized file extension:', bed[i], '\n', file = stderr())
	quit('no')
	}
	}

	if(chr == '')
	cmd <- paste(uncompr[i], '%BED')
	else{
	if(uncompr[i] == "cat" \|\| uncompr[i] == "zcat"){
	cmd <- paste(uncompr[i], ' %BED \| grep "^', chr, '" -', sep = '')
	}else if(uncompr[i] == "unstarch" \|\| uncompr[i] == "gchr"){
	cmd <- paste(uncompr[i], chr, '%BED')
	}
	}

	cmd <- paste(cmd, ' \| cut -f', col, sep = '')

	cat('Reading', bed[i], '...\n', file = stderr())

	con <- pipe(gsub('%BED', bed[i], cmd))
	rds[[i]] <- scan(con, what = numeric(0))
	close(con)
	}

	cv <- cor(rds[[1]], rds[[2]])
	cat(cv, '\n')