Created
March 13, 2019 11:09
-
-
Save geofflangdale/92d0e67a15151eabe7e9cdf1cd254ac0 to your computer and use it in GitHub Desktop.
Ponderous older version of our "are we inside quotes" code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////////////////////////////////////////////////////////////////////////////////////////// | |
// Step 2: detect insides of quote pairs | |
//////////////////////////////////////////////////////////////////////////////////////////// | |
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); | |
quote_bits = quote_bits & ~odd_ends; | |
dumpbits(quote_bits, "quote_bits"); | |
// pdep pattern is alternating 0 and 1 bits, starting with 0 or 1 depending on whether | |
// we're in a quote-pair from the previous iteration | |
u64 pdep_pattern = even_bits ^ prev_iter_inside_quote; | |
u64 starting_quotes = _pdep_u64(pdep_pattern, quote_bits); | |
// now starting quotes is the opening quote in each pair | |
dumpbits(pdep_pattern, "pdep_pattern"); | |
dumpbits(starting_quotes, "starting quotes"); | |
// record whether we borrow from out of bit 63 (whether we're in a quote at iteration end) | |
// we also need to subtract 1 if we need to; this reflects | |
// previous iteration status. This needs to be effectively subtracted from our starting | |
// quotes as we need to get a carry even if there aren't any quote bits in order to preserve | |
// our 'in a quote' status inside a long string | |
// aside from that it is simple: just clear starting quotes and subtract them, leaving us | |
// with one bits where we are inside quotes (a half-open range including the start quote | |
// but not the end quote). | |
u64 subtracted_starts; | |
bool iter_ends_quote = __builtin_usubll_overflow( | |
(quote_bits ^ starting_quotes), | |
starting_quotes - prev_iter_inside_quote, | |
&subtracted_starts); | |
prev_iter_inside_quote = iter_ends_quote ? 0xffffffffffffffffULL : 0x0ULL; | |
dumpbits(subtracted_starts, "subtracted_starts"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment