Und3rf10w · September 26, 2025 05:30
diff --git a/myAttempt.ts b/myAttempt.ts
 /**
 * This is my attempt at a solution. Send input text to ``parseInput``
 *  - Und3rf10w
 */

 export interface ParsedItem {
  type: 'code' | 'text';
  content: string;
  language?: string;
  metadata?: {
    fileName?: string;
    highlightedLines?: string;
  };
 }

 /**
 * A state-machine-based parser for extracting text and code blocks.
 * Handles nested code blocks and edge cases with fence lengths
 */
 export const parseInput = (inputText: string): ParsedItem[] => {
  if (!inputText) return [];

  const result: ParsedItem[] = [];
  const lines = inputText.split('\n');

  let inCodeBlock = false;
  let currentBlockLines: string[] = [];
  let currentInfoString = '';
  let currentOpenFenceLength = 0;

  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];

    if (inCodeBlock) {
      // We are INSIDE a code block, looking for a closing fence
      const trimmedLine = line.trim();
      const fenceMatch = trimmedLine.match(/^(`{3,})$/);

      if (fenceMatch) {
        const fenceLength = fenceMatch[0].length;

        // Special case: if this fence equals the opening length,
        // look ahead for a longer fence (skipping blank lines)
        if (fenceLength === currentOpenFenceLength) {
          let foundLongerFence = false;
          let lookAheadIndex = i + 1;
          let blankLinesCount = 0;
          const maxBlankLines = 3; // a reasonable limit for new lines between fences

          while (
            lookAheadIndex < lines.length &&
            blankLinesCount <= maxBlankLines
          ) {
            const lookAheadLine = lines[lookAheadIndex].trim();

            if (lookAheadLine === '') {
              blankLinesCount++;
              lookAheadIndex++;
              continue;
            }

            const lookAheadFenceMatch = lookAheadLine.match(/^(`{3,})$/);
            if (
              lookAheadFenceMatch &&
              lookAheadFenceMatch[0].length > fenceLength
            ) {
              foundLongerFence = true;
              break;
            }

            // If we hit non-blank, non-fence content, stop looking
            break;
          }

          if (foundLongerFence) {
            // The current fence is content, not a closer
            currentBlockLines.push(line);
            continue;
          } else {
            // No longer fence found, this IS the closing fence
            const codeContent = currentBlockLines.join('\n');
            const { language, metadata } = parseInfoString(currentInfoString);
            result.push({
              type: 'code',
              content: codeContent,
              language,
              metadata,
            });

            // Reset state
            inCodeBlock = false;
            currentBlockLines = [];
            currentInfoString = '';
            currentOpenFenceLength = 0;
            continue;
          }
        }

        // Handle fences that are longer than the opener
        if (fenceLength > currentOpenFenceLength) {
          // Exit the code block
          const codeContent = currentBlockLines.join('\n');
          const { language, metadata } = parseInfoString(currentInfoString);
          result.push({
            type: 'code',
            content: codeContent,
            language,
            metadata,
          });

          // Reset state
          inCodeBlock = false;
          currentBlockLines = [];
          currentInfoString = '';
          currentOpenFenceLength = 0;
        } else {
          // Fence is too short to close this block
          currentBlockLines.push(line);
        }
      } else {
        currentBlockLines.push(line);
      }
    } else {
      // We are OUTSIDE a code block, looking for an opening fence
      const trimmedLine = line.trim();
      const fenceMatch = trimmedLine.match(/^(`{3,})/);

      if (fenceMatch) {
        // Enter a new code block
        if (currentBlockLines.length > 0) {
          result.push({ type: 'text', content: currentBlockLines.join('\n') });
        }

        inCodeBlock = true;
        currentBlockLines = [];
        currentOpenFenceLength = fenceMatch[0].length;
        currentInfoString = trimmedLine
          .substring(currentOpenFenceLength)
          .trim();
      } else {
        currentBlockLines.push(line);
      }
    }
  }

  // Handle any remaining content
  if (currentBlockLines.length > 0) {
    const remainingContent = currentBlockLines.join('\n');
    if (inCodeBlock) {
      const { language, metadata } = parseInfoString(currentInfoString);
      result.push({
        type: 'code',
        content: remainingContent,
        language,
        metadata,
      });
    } else {
      result.push({ type: 'text', content: remainingContent });
    }
  }

  return result;
 };

 /**
 * Helper function to parse the info string of a code block.
 * Supports both quoted and unquoted filenames with spaces.
 * Only removes parentheses that look like legacy highlight syntax.
 */
 function parseInfoString(infoString: string) {
  const raw = infoString || '';

  // Extract highlighted lines from braces (preferred syntax)
  const braceHighlightMatch = raw.match(/\{([^}]*)\}/);
  const highlightedLines = braceHighlightMatch
    ? braceHighlightMatch[1].trim() || undefined
    : undefined;

  // Remove all brace blocks from further parsing
  let stripped = raw.replace(/\{[^}]*\}/g, ' ').trim();

  // Only remove parentheses that look like legacy highlight syntax
  // (contain numbers, commas, and dashes only)
  // Really this was just for my specific usecase
  stripped = stripped
    .replace(/\(\s*\d+\s*(?:-\s*\d+)?(?:\s*,\s*\d+\s*(?:-\s*\d+)?)*\s*\)/g, ' ')
    .trim();

  // Determine language as the first token (if not a key)
  let language = 'plaintext';
  let remainder = stripped;

  const firstTokenMatch = remainder.match(/^([^\s=]+)/);
  if (firstTokenMatch) {
    const token = firstTokenMatch[1];
    const after = remainder.slice(firstTokenMatch[0].length);
    const isKeyLike = /^\s*=/.test(after);

    if (!isKeyLike) {
      language = token.trim() || 'plaintext';
      remainder = after.trim();
    }
  }

  let fileName: string | undefined;

  // Parse key=value pairs with unquoted value handling
  while (remainder) {
    remainder = remainder.trim();

    // Match key= pattern (case-insensitive keys)
    const keyEqMatch = remainder.match(/^([A-Za-z][A-Za-z0-9_-]*)\s*=\s*/i);
    if (!keyEqMatch) break;

    const keyNorm = keyEqMatch[1].toLowerCase();
    remainder = remainder.slice(keyEqMatch[0].length);

    let val = '';

    // Handle quoted values
    if (remainder.startsWith('"') || remainder.startsWith("'")) {
      const quote = remainder[0];
      let j = 1;
      while (j < remainder.length) {
        const ch = remainder[j];
        if (ch === '\\' && j + 1 < remainder.length) {
          val += remainder[j + 1];
          j += 2;
          continue;
        }
        if (ch === quote) break;
        val += ch;
        j++;
      }
      remainder = remainder.slice(j + 1);
    } else {
      // Handle unquoted values - capture everything until we see another key= pattern
      let j = 0;
      let lastNonWhitespace = -1;

      while (j < remainder.length) {
        const ch = remainder[j];

        // Track last non-whitespace position
        if (!/\s/.test(ch)) {
          lastNonWhitespace = j;
        }

        // Look ahead for another key=value pair
        if (j > 0 && /\s/.test(ch)) {
          const ahead = remainder.slice(j).trim();
          if (/^[A-Za-z][A-Za-z0-9_-]*\s*=/i.test(ahead)) {
            // Found another key, stop here
            j = lastNonWhitespace + 1;
            break;
          }
        }

        j++;
      }

      val = remainder.slice(0, j).trim();
      remainder = remainder.slice(j);
    }

    // Support both 'filename' and 'name' (case-insensitive)
    if (keyNorm === 'filename' || keyNorm === 'name') {
      fileName = val;
    }
  }

  console.debug('parseInfoString result:', {
    input: infoString,
    language,
    metadata: { fileName, highlightedLines },
  });

  return { language, metadata: { fileName, highlightedLines } };
 }
diff --git a/stress test.md b/stress test.md
	/**
	* This is my attempt at a solution. Send input text to ``parseInput``
	* - Und3rf10w
	*/

	export interface ParsedItem {
	type: 'code' \| 'text';
	content: string;
	language?: string;
	metadata?: {
	fileName?: string;
	highlightedLines?: string;
	};
	}

	/**
	* A state-machine-based parser for extracting text and code blocks.
	* Handles nested code blocks and edge cases with fence lengths
	*/
	export const parseInput = (inputText: string): ParsedItem[] => {
	if (!inputText) return [];

	const result: ParsedItem[] = [];
	const lines = inputText.split('\n');

	let inCodeBlock = false;
	let currentBlockLines: string[] = [];
	let currentInfoString = '';
	let currentOpenFenceLength = 0;

	for (let i = 0; i < lines.length; i++) {
	const line = lines[i];

	if (inCodeBlock) {
	// We are INSIDE a code block, looking for a closing fence
	const trimmedLine = line.trim();
	const fenceMatch = trimmedLine.match(/^(`{3,})$/);

	if (fenceMatch) {
	const fenceLength = fenceMatch[0].length;

	// Special case: if this fence equals the opening length,
	// look ahead for a longer fence (skipping blank lines)
	if (fenceLength === currentOpenFenceLength) {
	let foundLongerFence = false;
	let lookAheadIndex = i + 1;
	let blankLinesCount = 0;
	const maxBlankLines = 3; // a reasonable limit for new lines between fences

	while (
	lookAheadIndex < lines.length &&
	blankLinesCount <= maxBlankLines
	) {
	const lookAheadLine = lines[lookAheadIndex].trim();

	if (lookAheadLine === '') {
	blankLinesCount++;
	lookAheadIndex++;
	continue;
	}

	const lookAheadFenceMatch = lookAheadLine.match(/^(`{3,})$/);
	if (
	lookAheadFenceMatch &&
	lookAheadFenceMatch[0].length > fenceLength
	) {
	foundLongerFence = true;
	break;
	}

	// If we hit non-blank, non-fence content, stop looking
	break;
	}

	if (foundLongerFence) {
	// The current fence is content, not a closer
	currentBlockLines.push(line);
	continue;
	} else {
	// No longer fence found, this IS the closing fence
	const codeContent = currentBlockLines.join('\n');
	const { language, metadata } = parseInfoString(currentInfoString);
	result.push({
	type: 'code',
	content: codeContent,
	language,
	metadata,
	});

	// Reset state
	inCodeBlock = false;
	currentBlockLines = [];
	currentInfoString = '';
	currentOpenFenceLength = 0;
	continue;
	}
	}

	// Handle fences that are longer than the opener
	if (fenceLength > currentOpenFenceLength) {
	// Exit the code block
	const codeContent = currentBlockLines.join('\n');
	const { language, metadata } = parseInfoString(currentInfoString);
	result.push({
	type: 'code',
	content: codeContent,
	language,
	metadata,
	});

	// Reset state
	inCodeBlock = false;
	currentBlockLines = [];
	currentInfoString = '';
	currentOpenFenceLength = 0;
	} else {
	// Fence is too short to close this block
	currentBlockLines.push(line);
	}
	} else {
	currentBlockLines.push(line);
	}
	} else {
	// We are OUTSIDE a code block, looking for an opening fence
	const trimmedLine = line.trim();
	const fenceMatch = trimmedLine.match(/^(`{3,})/);

	if (fenceMatch) {
	// Enter a new code block
	if (currentBlockLines.length > 0) {
	result.push({ type: 'text', content: currentBlockLines.join('\n') });
	}

	inCodeBlock = true;
	currentBlockLines = [];
	currentOpenFenceLength = fenceMatch[0].length;
	currentInfoString = trimmedLine
	.substring(currentOpenFenceLength)
	.trim();
	} else {
	currentBlockLines.push(line);
	}
	}
	}

	// Handle any remaining content
	if (currentBlockLines.length > 0) {
	const remainingContent = currentBlockLines.join('\n');
	if (inCodeBlock) {
	const { language, metadata } = parseInfoString(currentInfoString);
	result.push({
	type: 'code',
	content: remainingContent,
	language,
	metadata,
	});
	} else {
	result.push({ type: 'text', content: remainingContent });
	}
	}

	return result;
	};

	/**
	* Helper function to parse the info string of a code block.
	* Supports both quoted and unquoted filenames with spaces.
	* Only removes parentheses that look like legacy highlight syntax.
	*/
	function parseInfoString(infoString: string) {
	const raw = infoString \|\| '';

	// Extract highlighted lines from braces (preferred syntax)
	const braceHighlightMatch = raw.match(/\{([^}]*)\}/);
	const highlightedLines = braceHighlightMatch
	? braceHighlightMatch[1].trim() \|\| undefined
	: undefined;

	// Remove all brace blocks from further parsing
	let stripped = raw.replace(/\{[^}]*\}/g, ' ').trim();

	// Only remove parentheses that look like legacy highlight syntax
	// (contain numbers, commas, and dashes only)
	// Really this was just for my specific usecase
	stripped = stripped
	.replace(/\(\s\d+\s(?:-\s\d+)?(?:\s,\s\d+\s(?:-\s\d+)?)\s*\)/g, ' ')
	.trim();

	// Determine language as the first token (if not a key)
	let language = 'plaintext';
	let remainder = stripped;

	const firstTokenMatch = remainder.match(/^([^\s=]+)/);
	if (firstTokenMatch) {
	const token = firstTokenMatch[1];
	const after = remainder.slice(firstTokenMatch[0].length);
	const isKeyLike = /^\s*=/.test(after);

	if (!isKeyLike) {
	language = token.trim() \|\| 'plaintext';
	remainder = after.trim();
	}
	}

	let fileName: string \| undefined;

	// Parse key=value pairs with unquoted value handling
	while (remainder) {
	remainder = remainder.trim();

	// Match key= pattern (case-insensitive keys)
	const keyEqMatch = remainder.match(/^([A-Za-z][A-Za-z0-9_-])\s=\s*/i);
	if (!keyEqMatch) break;

	const keyNorm = keyEqMatch[1].toLowerCase();
	remainder = remainder.slice(keyEqMatch[0].length);

	let val = '';

	// Handle quoted values
	if (remainder.startsWith('"') \|\| remainder.startsWith("'")) {
	const quote = remainder[0];
	let j = 1;
	while (j < remainder.length) {
	const ch = remainder[j];
	if (ch === '\\' && j + 1 < remainder.length) {
	val += remainder[j + 1];
	j += 2;
	continue;
	}
	if (ch === quote) break;
	val += ch;
	j++;
	}
	remainder = remainder.slice(j + 1);
	} else {
	// Handle unquoted values - capture everything until we see another key= pattern
	let j = 0;
	let lastNonWhitespace = -1;

	while (j < remainder.length) {
	const ch = remainder[j];

	// Track last non-whitespace position
	if (!/\s/.test(ch)) {
	lastNonWhitespace = j;
	}

	// Look ahead for another key=value pair
	if (j > 0 && /\s/.test(ch)) {
	const ahead = remainder.slice(j).trim();
	if (/^[A-Za-z][A-Za-z0-9_-]\s=/i.test(ahead)) {
	// Found another key, stop here
	j = lastNonWhitespace + 1;
	break;
	}
	}

	j++;
	}

	val = remainder.slice(0, j).trim();
	remainder = remainder.slice(j);
	}

	// Support both 'filename' and 'name' (case-insensitive)
	if (keyNorm === 'filename' \|\| keyNorm === 'name') {
	fileName = val;
	}
	}

	console.debug('parseInfoString result:', {
	input: infoString,
	language,
	metadata: { fileName, highlightedLines },
	});

	return { language, metadata: { fileName, highlightedLines } };
	}
No results found