rob-p · January 19, 2016 21:10
diff --git a/EffLengths.py b/EffLengths.py
 def effLen(L, pmf, meanLen):
    if L < meanLen:
        return L
    minVal = 0
    maxVal = len(pmf)-1
    mval = maxVal
    clen = minVal
    maxLen = min(L, mval)
    effectiveLength = 0
    while (clen <= maxLen):
        i = clen - minVal
        effectiveLength += pmf[i] * (L - clen + 1)
        clen += 1
    return effectiveLength
    
 def readLib(libFile):
  countDict = {}
  with open(libFile) as ifile:
    for l in ifile:
      toks = l.rstrip().split()
      p1, p2 = int(toks[0]), int(toks[1])
      length = abs(p1 - p2)
      count = int(toks[-1])
      if length in countDict:
        countDict[length] += count
      else:
        countDict[length] = count
  return countDict

  def computeEffectiveLengths(d, libFile):
    cd = readLib(libFile)
    l, c = [], []
    for length, count in cd.iteritems():
      l.append(length)
      c.append(count)
    tot = float(sum(c))
    maxFrag = max(l)
    pdf = np.zeros(maxFrag + 1)
    cdf = np.zeros(maxFrag + 1)
    for a, b in zip(l, c):
      pdf[a] += b / tot
    cdf = np.cumsum(pdf)
    vfunc = np.vectorize(effLen, excluded=['pmf'])
    meanLen = pdf.mean()
    # Assuming the current lengths are in d["Length_true"]
    lengths = d["Length_true"].values
    effLengths = vfunc(L=lengths, pmf=pdf, meanLen=meanLen)
    d["EffLength_true".format(suffix)] = effLengths
	def effLen(L, pmf, meanLen):
	if L < meanLen:
	return L
	minVal = 0
	maxVal = len(pmf)-1
	mval = maxVal
	clen = minVal
	maxLen = min(L, mval)
	effectiveLength = 0
	while (clen <= maxLen):
	i = clen - minVal
	effectiveLength += pmf[i] * (L - clen + 1)
	clen += 1
	return effectiveLength

	def readLib(libFile):
	countDict = {}
	with open(libFile) as ifile:
	for l in ifile:
	toks = l.rstrip().split()
	p1, p2 = int(toks[0]), int(toks[1])
	length = abs(p1 - p2)
	count = int(toks[-1])
	if length in countDict:
	countDict[length] += count
	else:
	countDict[length] = count
	return countDict

	def computeEffectiveLengths(d, libFile):
	cd = readLib(libFile)
	l, c = [], []
	for length, count in cd.iteritems():
	l.append(length)
	c.append(count)
	tot = float(sum(c))
	maxFrag = max(l)
	pdf = np.zeros(maxFrag + 1)
	cdf = np.zeros(maxFrag + 1)
	for a, b in zip(l, c):
	pdf[a] += b / tot
	cdf = np.cumsum(pdf)
	vfunc = np.vectorize(effLen, excluded=['pmf'])
	meanLen = pdf.mean()
	# Assuming the current lengths are in d["Length_true"]
	lengths = d["Length_true"].values
	effLengths = vfunc(L=lengths, pmf=pdf, meanLen=meanLen)
	d["EffLength_true".format(suffix)] = effLengths