Created
August 2, 2013 16:53
-
-
Save jcheype/6141464 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.vidal.bo.monoviewer.util; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.regex.Pattern; | |
/** | |
* Created with IntelliJ IDEA. | |
* User: jcheype | |
* Date: 02/08/13 | |
* Time: 17:38 | |
* To change this template use File | Settings | File Templates. | |
*/ | |
public class HtmlDiff { | |
enum Mode { | |
CHAR, | |
TAG, | |
WHITESPACE | |
} | |
class Match{ | |
int startInBefore; | |
int startInAfter; | |
int length; | |
int endInBefore; | |
int endInAfter; | |
Match(int startInBefore, int startInAfter, int length) { | |
this.startInBefore = startInBefore; | |
this.startInAfter = startInAfter; | |
this.length = length; | |
endInBefore = startInBefore + length - 1; | |
endInAfter = startInAfter + length - 1; | |
} | |
} | |
private Pattern tagPattern = Pattern.compile("^\\s*<[^>]+>\\s*$"); | |
private boolean isEndOfTag(char c) { | |
return c == '>'; | |
} | |
private boolean isStartOfTag(char c) { | |
return c == '<'; | |
} | |
private boolean isWhitespace(char c) { | |
return Character.isWhitespace(c); | |
} | |
private boolean isTag(String token) { | |
return tagPattern.matcher(token).matches(); | |
} | |
private StringBuilder pushWord(StringBuilder currentWord, List<String> words){ | |
if (currentWord.length() > 0) { | |
words.add(currentWord.toString()); | |
return new StringBuilder(128); | |
} | |
return currentWord; | |
} | |
private List<String> htmlToTokens(String html) { | |
List<String> words = new ArrayList<String>(2048); | |
Mode mode = Mode.CHAR; | |
StringBuilder currentWord = new StringBuilder(32); | |
for (char c : html.toCharArray()) { | |
switch (mode) { | |
case TAG: | |
currentWord.append(c); | |
if (isEndOfTag(c)) { | |
words.add(currentWord.toString()); | |
currentWord = new StringBuilder(32); | |
mode = Mode.CHAR; | |
} | |
break; | |
case CHAR: | |
if (isStartOfTag(c)) { | |
currentWord = pushWord(currentWord, words); | |
mode = Mode.TAG; | |
} else if (isWhitespace(c)) { | |
currentWord = pushWord(currentWord, words); | |
mode = Mode.TAG; | |
} else if (!Character.isLetter(c)) { | |
currentWord = pushWord(currentWord, words); | |
} | |
currentWord.append(c); | |
break; | |
case WHITESPACE: | |
if (isStartOfTag(c)) { | |
currentWord = pushWord(currentWord, words); | |
mode = Mode.TAG; | |
} | |
else if(!isWhitespace(c)){ | |
currentWord = pushWord(currentWord, words); | |
mode = Mode.CHAR; | |
} | |
currentWord.append(c); | |
break; | |
} | |
} | |
pushWord(currentWord, words); | |
return words; | |
} | |
private void findMatch(int beforeTokens, | |
int afterTokens, | |
int indexOfBeforeLocationsInAfterTokens, | |
int startInBefore, | |
int endInBefore, | |
int startInAfter, | |
int endInAfter){ | |
int bestMatchInBefore = startInBefore; | |
int bestMatchInAfter = startInAfter; | |
int bestMatchLength = 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment