kowey · August 29, 2015 14:04
diff --git a/gistfile1.py b/gistfile1.py
 def _surrounding_paragraph(edu):
    """
    Determine which paragraph (if any) surrounds this EDU.
    Try to accomodate the occasional off-by-a-smidgen error by
    folks marking these EDU boundaries, eg. original text:

    Para1: "Magazines are not providing us in-depth information on
    circulation," said Edgar Bronfman Jr., .. "How do readers feel
    about the magazine?...
    Research doesn't tell us whether people actually do read the
    magazines they subscribe to."

    Para2: Reuben Mark, chief executive of Colgate-Palmolive, said...

    Marked up EDU is wide to the left by three characters:
    "

    Reuben Mark, chief executive of Colgate-Palmolive, said...
    """

    espan = edu.text_span()
    paras = [x for x in edu.context.paragraphs
             if x.text_span().encloses(espan)]
    # sloppy EDUs happen; try shaving off some characters
    # if we can't find a paragraph
    if not paras:
        espan = copy.copy(espan)
        espan.char_start += 1
        espan.char_end -= 1
        etext = edu.context.text(espan)
        # kill left whitespace
        espan.char_start += len(etext) - len(etext.lstrip())
        etext = etext.lstrip()
        # kill right whitespace
        espan.char_end -= len(etext) - len(etext.rstrip())
        etext = etext.rstrip()
        # try again
        paras = [x for x in edu.context.paragraphs
                 if x.text_span().encloses(espan)]
    return paras[0] if paras else None
	def _surrounding_paragraph(edu):
	"""
	Determine which paragraph (if any) surrounds this EDU.
	Try to accomodate the occasional off-by-a-smidgen error by
	folks marking these EDU boundaries, eg. original text:

	Para1: "Magazines are not providing us in-depth information on
	circulation," said Edgar Bronfman Jr., .. "How do readers feel
	about the magazine?...
	Research doesn't tell us whether people actually do read the
	magazines they subscribe to."

	Para2: Reuben Mark, chief executive of Colgate-Palmolive, said...

	Marked up EDU is wide to the left by three characters:
	"

	Reuben Mark, chief executive of Colgate-Palmolive, said...
	"""

	espan = edu.text_span()
	paras = [x for x in edu.context.paragraphs
	if x.text_span().encloses(espan)]
	# sloppy EDUs happen; try shaving off some characters
	# if we can't find a paragraph
	if not paras:
	espan = copy.copy(espan)
	espan.char_start += 1
	espan.char_end -= 1
	etext = edu.context.text(espan)
	# kill left whitespace
	espan.char_start += len(etext) - len(etext.lstrip())
	etext = etext.lstrip()
	# kill right whitespace
	espan.char_end -= len(etext) - len(etext.rstrip())
	etext = etext.rstrip()
	# try again
	paras = [x for x in edu.context.paragraphs
	if x.text_span().encloses(espan)]
	return paras[0] if paras else None