jbaiter · June 12, 2017 12:57
diff --git a/ocr_align.py b/ocr_align.py
 def align(truth_lines, ocr_lines):
    nonaligned = []
    aligned = []
    align_idx = 0
    for truth_line in truth_lines:
        best_error = 1.0
        best_align = 0
        for idx, ocr_line in enumerate(ocr_lines[align_idx+1:],
                                        align_idx+1):
            total_error = levenshtein(truth_line, ocr_line)
            error = total_error / len(truth_line)
            if error < best_error:
                best_error = error
                best_align = idx
        if best_error < 0.5:
            num_aligned += 1
            align_idx = best_align
            aligned.append((align_idx, truth_line))
            logger.debug(
                u"Matched `{}` <-> `{}` ({})".format(
                    truth_line, ocr_text[best_align], best_error))
        else:
            nonaligned.append(truth_line)
            if best_align < len(ocr_text):
                logger.debug(
                    u"Could not find match for '{}', closest candidate "
                    u"was '{}' with an error of {}"
                    .format(truth_line, ocr_text[best_align], best_error))
    return aligned, nonaligned
	def align(truth_lines, ocr_lines):
	nonaligned = []
	aligned = []
	align_idx = 0
	for truth_line in truth_lines:
	best_error = 1.0
	best_align = 0
	for idx, ocr_line in enumerate(ocr_lines[align_idx+1:],
	align_idx+1):
	total_error = levenshtein(truth_line, ocr_line)
	error = total_error / len(truth_line)
	if error < best_error:
	best_error = error
	best_align = idx
	if best_error < 0.5:
	num_aligned += 1
	align_idx = best_align
	aligned.append((align_idx, truth_line))
	logger.debug(
	u"Matched `{}` <-> `{}` ({})".format(
	truth_line, ocr_text[best_align], best_error))
	else:
	nonaligned.append(truth_line)
	if best_align < len(ocr_text):
	logger.debug(
	u"Could not find match for '{}', closest candidate "
	u"was '{}' with an error of {}"
	.format(truth_line, ocr_text[best_align], best_error))
	return aligned, nonaligned