Skip to content

Instantly share code, notes, and snippets.

@myndzi
Created February 12, 2016 23:02
Show Gist options
  • Save myndzi/4a615b3cd820658627a0 to your computer and use it in GitHub Desktop.
Save myndzi/4a615b3cd820658627a0 to your computer and use it in GitHub Desktop.
private String cleanHTML(String str) {
Element element = Jsoup.parse(str);
final StringBuilder buffer = new StringBuilder();
new NodeTraversor(new NodeVisitor() {
boolean isNewline = true;
@Override
public void head(Node node, int depth) {
}
@Override
public void tail(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
String text = textNode.text();//.replace('\u00A0', ' ').trim();
if(!text.isEmpty()) {
buffer.append(text);
isNewline = false;
}
} else if (node instanceof Element) {
Element element = (Element) node;
if (!isNewline) {
if((element.isBlock() || element.tagName().equals("br"))) {
buffer.append("\n");
isNewline = true;
}
}
}
}
}).traverse(element);
return buffer.toString();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment