drewda · October 19, 2011 18:23 · ratnanil · Jun 5, 2015 · 121onto · Sep 24, 2016
diff --git a/gistfile1.py b/gistfile1.py
 # code from http://danieljlewis.org/files/2010/06/Jenks.pdf
 # described at http://danieljlewis.org/2010/06/07/jenks-natural-breaks-algorithm-in-python/

 def getJenksBreaks( dataList, numClass ):
  dataList.sort()
  mat1 = []
  for i in range(0,len(dataList)+1):
    temp = []
    for j in range(0,numClass+1):
      temp.append(0)
    mat1.append(temp)
  mat2 = []
  for i in range(0,len(dataList)+1):
    temp = []
    for j in range(0,numClass+1):
      temp.append(0)
    mat2.append(temp)
  for i in range(1,numClass+1):
    mat1[1][i] = 1
    mat2[1][i] = 0
    for j in range(2,len(dataList)+1):
      mat2[j][i] = float('inf')
  v = 0.0
  for l in range(2,len(dataList)+1):
    s1 = 0.0
    s2 = 0.0
    w = 0.0
    for m in range(1,l+1):
      i3 = l - m + 1
      val = float(dataList[i3-1])
      s2 += val * val
      s1 += val
      w += 1
      v = s2 - (s1 * s1) / w
      i4 = i3 - 1
      if i4 != 0:
        for j in range(2,numClass+1):
          if mat2[l][j] >= (v + mat2[i4][j - 1]):
            mat1[l][j] = i3
            mat2[l][j] = v + mat2[i4][j - 1]
    mat1[l][1] = 1
    mat2[l][1] = v
  k = len(dataList)
  kclass = []
  for i in range(0,numClass+1):
    kclass.append(0)
  kclass[numClass] = float(dataList[len(dataList) - 1])
  countNum = numClass
  while countNum >= 2:#print "rank = " + str(mat1[k][countNum])
    id = int((mat1[k][countNum]) - 2)
    #print "val = " + str(dataList[id])
    kclass[countNum - 1] = dataList[id]
    k = int((mat1[k][countNum] - 1))
    countNum -= 1
  return kclass
  
 def getGVF( dataList, numClass ):
  """
  The Goodness of Variance Fit (GVF) is found by taking the 
  difference between the squared deviations
  from the array mean (SDAM) and the squared deviations from the 
  class means (SDCM), and dividing by the SDAM
  """
  breaks = getJenksBreaks(dataList, numClass)
  dataList.sort()
  listMean = sum(dataList)/len(dataList)
  print listMean
  SDAM = 0.0
  for i in range(0,len(dataList)):
    sqDev = (dataList[i] - listMean)**2
    SDAM += sqDev
  SDCM = 0.0
  for i in range(0,numClass):
    if breaks[i] == 0:
      classStart = 0
    else:
      classStart = dataList.index(breaks[i])
      classStart += 1
    classEnd = dataList.index(breaks[i+1])
    classList = dataList[classStart:classEnd+1]
    classMean = sum(classList)/len(classList)
    print classMean
    preSDCM = 0.0
    for j in range(0,len(classList)):
      sqDev2 = (classList[j] - classMean)**2
      preSDCM += sqDev2
    SDCM += preSDCM
  return (SDAM - SDCM)/SDAM
  
 # written by Drew
 # used after running getJenksBreaks()
 def classify(value, breaks):
  for i in range(1, len(breaks)):
    if value < breaks[i]:
      return i
  return len(breaks) - 1
	# code from http://danieljlewis.org/files/2010/06/Jenks.pdf
	# described at http://danieljlewis.org/2010/06/07/jenks-natural-breaks-algorithm-in-python/

	def getJenksBreaks( dataList, numClass ):
	dataList.sort()
	mat1 = []
	for i in range(0,len(dataList)+1):
	temp = []
	for j in range(0,numClass+1):
	temp.append(0)
	mat1.append(temp)
	mat2 = []
	for i in range(0,len(dataList)+1):
	temp = []
	for j in range(0,numClass+1):
	temp.append(0)
	mat2.append(temp)
	for i in range(1,numClass+1):
	mat1[1][i] = 1
	mat2[1][i] = 0
	for j in range(2,len(dataList)+1):
	mat2[j][i] = float('inf')
	v = 0.0
	for l in range(2,len(dataList)+1):
	s1 = 0.0
	s2 = 0.0
	w = 0.0
	for m in range(1,l+1):
	i3 = l - m + 1
	val = float(dataList[i3-1])
	s2 += val * val
	s1 += val
	w += 1
	v = s2 - (s1 * s1) / w
	i4 = i3 - 1
	if i4 != 0:
	for j in range(2,numClass+1):
	if mat2[l][j] >= (v + mat2[i4][j - 1]):
	mat1[l][j] = i3
	mat2[l][j] = v + mat2[i4][j - 1]
	mat1[l][1] = 1
	mat2[l][1] = v
	k = len(dataList)
	kclass = []
	for i in range(0,numClass+1):
	kclass.append(0)
	kclass[numClass] = float(dataList[len(dataList) - 1])
	countNum = numClass
	while countNum >= 2:#print "rank = " + str(mat1[k][countNum])
	id = int((mat1[k][countNum]) - 2)
	#print "val = " + str(dataList[id])
	kclass[countNum - 1] = dataList[id]
	k = int((mat1[k][countNum] - 1))
	countNum -= 1
	return kclass

	def getGVF( dataList, numClass ):
	"""
	The Goodness of Variance Fit (GVF) is found by taking the
	difference between the squared deviations
	from the array mean (SDAM) and the squared deviations from the
	class means (SDCM), and dividing by the SDAM
	"""
	breaks = getJenksBreaks(dataList, numClass)
	dataList.sort()
	listMean = sum(dataList)/len(dataList)
	print listMean
	SDAM = 0.0
	for i in range(0,len(dataList)):
	sqDev = (dataList[i] - listMean)**2
	SDAM += sqDev
	SDCM = 0.0
	for i in range(0,numClass):
	if breaks[i] == 0:
	classStart = 0
	else:
	classStart = dataList.index(breaks[i])
	classStart += 1
	classEnd = dataList.index(breaks[i+1])
	classList = dataList[classStart:classEnd+1]
	classMean = sum(classList)/len(classList)
	print classMean
	preSDCM = 0.0
	for j in range(0,len(classList)):
	sqDev2 = (classList[j] - classMean)**2
	preSDCM += sqDev2
	SDCM += preSDCM
	return (SDAM - SDCM)/SDAM

	# written by Drew
	# used after running getJenksBreaks()
	def classify(value, breaks):
	for i in range(1, len(breaks)):
	if value < breaks[i]:
	return i
	return len(breaks) - 1