alexandre · April 28, 2014 02:06
diff --git a/limpa_html.py b/limpa_html.py
  def _limpa_html(self, html_bruto):
        '''
        Processa o html recuperado e devolve apenas a lista de tags
        definidas abaixo.
        Dica encontrada em:
        http://stackoverflow.com/questions/699468/python-html\
                -sanitizer-scrubber-filter
        peguei uma das ideias da thread, a mais simples...
        '''
        TAG_VALIDAS = ['table', 'tr', 'td', 'span', 'div']
        soup = BeautifulSoup(html_bruto)

        for tag in soup.findAll(True):
            if tag.name not in TAG_VALIDAS:
                tag.hidden = True

        #return str(soup.renderContents())
        return soup
	def _limpa_html(self, html_bruto):
	'''
	Processa o html recuperado e devolve apenas a lista de tags
	definidas abaixo.
	Dica encontrada em:
	http://stackoverflow.com/questions/699468/python-html\
	-sanitizer-scrubber-filter
	peguei uma das ideias da thread, a mais simples...
	'''
	TAG_VALIDAS = ['table', 'tr', 'td', 'span', 'div']
	soup = BeautifulSoup(html_bruto)

	for tag in soup.findAll(True):
	if tag.name not in TAG_VALIDAS:
	tag.hidden = True

	#return str(soup.renderContents())
	return soup
No results found