lukaszkalnik · October 27, 2016 09:40
diff --git a/MarkdownLinearizeNestedAsterisks.java b/MarkdownLinearizeNestedAsterisks.java
    private static String preprocessNestedAsterisks(String markdownText) {

        // Here we treat double/single asterisks (emphasis)
        // because nested emphasis (**for example *like* this**)
        // is not interpreted correctly by the Bypass lib
        String[] doubleAsteriskSplitMarkdown = markdownText.split("\\*\\*", -1);
        int numberOfDoubleAsteriskedTokens = doubleAsteriskSplitMarkdown.length;

        // strings surrounded by double asterisks are always on the odd array positions
        for (int i = 1; i < numberOfDoubleAsteriskedTokens; i += 2) {

            // Because of our simplified approach using markdownText.split("\\*\\*") above,
            // in case of trailing triple asterisks (...bold italic text***),
            // the first two asterisks get matched by the split() method, and the last asterisk
            // is moved inappropriately to the next token in the process (whereas it still belongs
            // to the current token, as a closing asterisk of the inner ...italic text*).
            //
            // So we have to restore any trailing asterisks which got moved to the next tokens
            // inadvertently.
            // This is actually simple, because only odd tokens can be the *italic text* tokens.
            // So no even token can start with an asterisk.
            if (i < numberOfDoubleAsteriskedTokens - 1 && doubleAsteriskSplitMarkdown[i + 1].startsWith("*")) {
                doubleAsteriskSplitMarkdown[i] += "*";
                doubleAsteriskSplitMarkdown[i + 1] = doubleAsteriskSplitMarkdown[i + 1].substring(1);
            }

            // now looking inside the **external string** (surrounded by double asterisks)
            // for possible *nested string* (surrounded by single asterisks)
            String[] singleAsteriskSplitContents = doubleAsteriskSplitMarkdown[i].split("\\*", -1);
            int numberOfSingleAsteriskedTokens = singleAsteriskSplitContents.length;

            if (numberOfSingleAsteriskedTokens > 2) {
                // at least one *nested string* found =>
                //
                // exchange all the occurrences to ** ***nested string*** **
                // (adding two asterisks to close each part of the **external string**, a space in between
                // and also adding two asterisks on each side of the nested string to make a triple emphasized ***nested string***).
                //
                // So it will look like this:
                //
                // Input string before conversion:
                // **Begin the external string *here the nested string* and here the external string continues**
                //
                // After conversion, correctly interpreted by Bypass:
                // **Begin the external string** ***here the nested string*** **and here the external string continues**

                StringBuilder builder = new StringBuilder();
                String currentToken;
                for (int j = 0; j < numberOfSingleAsteriskedTokens - 1; j++) {
                    currentToken = singleAsteriskSplitContents[j];

                    if (j % 2 == 0) {
                        // we are in the double-asterisked-only original Markdown part, e.g.:
                        // **This is the first string part *...

                        // There are 3 special cases though:
                        // First two, if the string had triple asterisks in the beginning or in the end
                        // meaning either the start or the end was bold italic, like this:
                        // ***bold italic text at the start* only bold text here**
                        // **bold text only here *bold italic in the end***
                        //
                        // Or the whole text could be bold italic (triple asterisks on both sides):
                        // ***bold italic all the way***
                        //
                        // In any of these cases, we get a token consisting of an empty string at the beginning,
                        // at the end, or both (depending where the triple asterisks were).
                        // Then we leave the triple asterisks as they were - no need to modify anything.
                        if (j == 0 && currentToken.isEmpty()) {
                            // This is the special case where the original Markdown began with
                            // triple asterisks:
                            // ***bold italic at the beginning*...
                            // So we just restore the parsed out delimiter
                            builder.append(currentToken + "*");
                        } else {
                            // the double-asterisked-only original Markdown part usually ends with a space:
                            // **begin external string *nested string*...
                            // We have to remove the trailing space because it will be re-added in between
                            // the additional asterisks: ** ***
                            currentToken = removeTrailingSpace(currentToken);

                            // if we are in any part other than the very first one (**begin external string ),
                            // it also begins with a space:
                            // *nested string* here the second part...
                            // (subsequent parts are also possible).
                            //
                            // We need to remove this leading space character as well because we have already added it behind
                            // the previous *nested string* (between the five asterisks, like this: *** **).
                            // See also the "else" clause below.
                            if (j != 0) {
                                currentToken = removeLeadingSpace(currentToken);
                            }

                            // now add trailing double asterisks for this string, re-add the trailing space
                            // behind them and add triple leading asterisks for the coming *nested string*
                            builder.append(currentToken + "** ***");
                        }

                    } else {
                        // This is the text inside the single-asterisked *nested string*.

                        if (j == numberOfSingleAsteriskedTokens - 2 &&  singleAsteriskSplitContents[j + 1].isEmpty()) {
                            // We are at the token surrounded by single asterisks before the last token
                            // and the following (last) token is empty =>
                            // this is another special case, where the original Markdown string
                            // ended with triple asterisks, like this:
                            // ...*bold italic at the end***
                            //
                            // So we just restore the parsed out delimiter
                            builder.append(currentToken + "*");
                        } else {
                            // Add trailing triple asterisks for the nested string, a space
                            // and then leading double asterisks for the next part of the external
                            // double-asterisked string
                            builder.append(currentToken + "*** **");
                        }
                    }

                }
                // we add just the last token and no asterisks after it,
                // first removing the leading space here as well
                currentToken = singleAsteriskSplitContents[numberOfSingleAsteriskedTokens - 1];
                currentToken = removeLeadingSpace(currentToken);
                builder.append(currentToken);

                // and now replace the original **external double-asterisked string with *nested* strings**
                // with our modified one
                doubleAsteriskSplitMarkdown[i] = builder.toString();
            }
        }

        // put the split on double asterisks Markdown contents together again
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < numberOfDoubleAsteriskedTokens - 1; i++) {
            builder.append(doubleAsteriskSplitMarkdown[i] + "**");
        }
        // last token simply ends with the end of string, so we don't add double asterisks
        builder.append(doubleAsteriskSplitMarkdown[numberOfDoubleAsteriskedTokens - 1]);
        return builder.toString();

    }
	private static String preprocessNestedAsterisks(String markdownText) {

	// Here we treat double/single asterisks (emphasis)
	// because nested emphasis (*for example like* this**)
	// is not interpreted correctly by the Bypass lib
	String[] doubleAsteriskSplitMarkdown = markdownText.split("\\\\", -1);
	int numberOfDoubleAsteriskedTokens = doubleAsteriskSplitMarkdown.length;

	// strings surrounded by double asterisks are always on the odd array positions
	for (int i = 1; i < numberOfDoubleAsteriskedTokens; i += 2) {

	// Because of our simplified approach using markdownText.split("\\\\") above,
	// in case of trailing triple asterisks (...bold italic text***),
	// the first two asterisks get matched by the split() method, and the last asterisk
	// is moved inappropriately to the next token in the process (whereas it still belongs
	// to the current token, as a closing asterisk of the inner ...italic text*).
	//
	// So we have to restore any trailing asterisks which got moved to the next tokens
	// inadvertently.
	// This is actually simple, because only odd tokens can be the italic text tokens.
	// So no even token can start with an asterisk.
	if (i < numberOfDoubleAsteriskedTokens - 1 && doubleAsteriskSplitMarkdown[i + 1].startsWith("*")) {
	doubleAsteriskSplitMarkdown[i] += "*";
	doubleAsteriskSplitMarkdown[i + 1] = doubleAsteriskSplitMarkdown[i + 1].substring(1);
	}

	// now looking inside the external string (surrounded by double asterisks)
	// for possible nested string (surrounded by single asterisks)
	String[] singleAsteriskSplitContents = doubleAsteriskSplitMarkdown[i].split("\\*", -1);
	int numberOfSingleAsteriskedTokens = singleAsteriskSplitContents.length;

	if (numberOfSingleAsteriskedTokens > 2) {
	// at least one nested string found =>
	//
	// exchange all the occurrences to nested string
	// (adding two asterisks to close each part of the external string, a space in between
	// and also adding two asterisks on each side of the nested string to make a triple emphasized *nested string*).
	//
	// So it will look like this:
	//
	// Input string before conversion:
	// *Begin the external string here the nested string* and here the external string continues**
	//
	// After conversion, correctly interpreted by Bypass:
	// Begin the external string *here the nested string* and here the external string continues

	StringBuilder builder = new StringBuilder();
	String currentToken;
	for (int j = 0; j < numberOfSingleAsteriskedTokens - 1; j++) {
	currentToken = singleAsteriskSplitContents[j];

	if (j % 2 == 0) {
	// we are in the double-asterisked-only original Markdown part, e.g.:
	// *This is the first string part ...

	// There are 3 special cases though:
	// First two, if the string had triple asterisks in the beginning or in the end
	// meaning either the start or the end was bold italic, like this:
	// **bold italic text at the start only bold text here**
	// *bold text only here bold italic in the end***
	//
	// Or the whole text could be bold italic (triple asterisks on both sides):
	// *bold italic all the way*
	//
	// In any of these cases, we get a token consisting of an empty string at the beginning,
	// at the end, or both (depending where the triple asterisks were).
	// Then we leave the triple asterisks as they were - no need to modify anything.
	if (j == 0 && currentToken.isEmpty()) {
	// This is the special case where the original Markdown began with
	// triple asterisks:
	// **bold italic at the beginning...
	// So we just restore the parsed out delimiter
	builder.append(currentToken + "*");
	} else {
	// the double-asterisked-only original Markdown part usually ends with a space:
	// *begin external string nested string*...
	// We have to remove the trailing space because it will be re-added in between
	// the additional asterisks: *
	currentToken = removeTrailingSpace(currentToken);

	// if we are in any part other than the very first one (**begin external string ),
	// it also begins with a space:
	// nested string here the second part...
	// (subsequent parts are also possible).
	//
	// We need to remove this leading space character as well because we have already added it behind
	// the previous nested string (between the five asterisks, like this: * ).
	// See also the "else" clause below.
	if (j != 0) {
	currentToken = removeLeadingSpace(currentToken);
	}

	// now add trailing double asterisks for this string, re-add the trailing space
	// behind them and add triple leading asterisks for the coming nested string
	builder.append(currentToken + " *");
	}

	} else {
	// This is the text inside the single-asterisked nested string.

	if (j == numberOfSingleAsteriskedTokens - 2 && singleAsteriskSplitContents[j + 1].isEmpty()) {
	// We are at the token surrounded by single asterisks before the last token
	// and the following (last) token is empty =>
	// this is another special case, where the original Markdown string
	// ended with triple asterisks, like this:
	// ...bold italic at the end**
	//
	// So we just restore the parsed out delimiter
	builder.append(currentToken + "*");
	} else {
	// Add trailing triple asterisks for the nested string, a space
	// and then leading double asterisks for the next part of the external
	// double-asterisked string
	builder.append(currentToken + "* ");
	}
	}

	}
	// we add just the last token and no asterisks after it,
	// first removing the leading space here as well
	currentToken = singleAsteriskSplitContents[numberOfSingleAsteriskedTokens - 1];
	currentToken = removeLeadingSpace(currentToken);
	builder.append(currentToken);

	// and now replace the original *external double-asterisked string with nested* strings**
	// with our modified one
	doubleAsteriskSplitMarkdown[i] = builder.toString();
	}
	}

	// put the split on double asterisks Markdown contents together again
	StringBuilder builder = new StringBuilder();
	for (int i = 0; i < numberOfDoubleAsteriskedTokens - 1; i++) {
	builder.append(doubleAsteriskSplitMarkdown[i] + "**");
	}
	// last token simply ends with the end of string, so we don't add double asterisks
	builder.append(doubleAsteriskSplitMarkdown[numberOfDoubleAsteriskedTokens - 1]);
	return builder.toString();

	}