axayjha · May 16, 2025 14:28
diff --git a/gistfile2.js b/gistfile2.js
 /**
 * @param {string[]} robotLines The lines of the robots.txt file (only ASCII characters, no newline
 * characters at the end was already removed).
 * @return {string[]} All the disallowed url patterns that apply to your search engine.
 */
 function parseRobotFile(robotLines) {
    let doeBotRules = [];
    let wildcardRules = [];
    
    // This variable tracks the agent type for the current section being processed.
    // It can be 'DoeBot', '*', or 'other' (if the section is for a different bot).
    let currentSectionAgentType = null; 

    const USER_AGENT_PREFIX = "User-agent:";
    const DISALLOW_PREFIX = "Disallow:";

    for (const rawLine of robotLines) {
        // "There can be spaces at the beginning and end of a line... They must be ignored."
        const line = rawLine.trim();

        if (line.startsWith(USER_AGENT_PREFIX)) {
            // "All keywords, search engine names ... are case-sensitive."
            const agentName = line.substring(USER_AGENT_PREFIX.length).trim();
            if (agentName === "DoeBot") {
                currentSectionAgentType = "DoeBot";
            } else if (agentName === "*") {
                currentSectionAgentType = "*";
            } else {
                currentSectionAgentType = "other"; // Section for another bot
            }
        } else if (line.startsWith(DISALLOW_PREFIX)) {
            // "There is always at least one non-space character after the text Disallow:"
            // "A uri pattern can have any type of characters (letters, digits, etc.), except spaces."
            // Trimming here handles spaces between "Disallow:" and the actual pattern.
            const path = line.substring(DISALLOW_PREFIX.length).trim();

            // Add rule only if it's for an applicable agent section
            // and path is not empty (though problem states it won't be)
            if (path) { // Ensure path is not empty after trim, though rule says it won't be.
                if (currentSectionAgentType === "DoeBot") {
                    doeBotRules.push(path);
                } else if (currentSectionAgentType === "*") {
                    wildcardRules.push(path);
                }
            }
        }
        // "All lines not starting with Disallow: or User-agent: must be ignored."
        // This is implicitly handled as other lines don't match the if/else if conditions.
    }

    let finalRules;
    // DoeBot specific rules take precedence.
    // If there are any rules specifically for "DoeBot", use them.
    // Otherwise, use rules for "*".
    if (doeBotRules.length > 0) {
        finalRules = doeBotRules;
    } else {
        finalRules = wildcardRules;
    }

    // "Before returning the disallowed url patterns, remove the duplicates and sort them."
    // Using a Set to handle duplicates.
    const uniqueRules = [...new Set(finalRules)];
    
    // Sorting alphabetically (default string sort is case-sensitive, as required).
    uniqueRules.sort();

    return uniqueRules;
 }

 // Example Test from the image:
 // const testLines = ["User-agent: *", "Crawl-delay: 10", "Disallow: /administrator/", "User-agent: DoeBot", "Disallow: /includes/", "Disallow: /"];
 // console.log(parseRobotFile(testLines)); // Expected: ["/", "/includes/"]
	/**
	* @param {string[]} robotLines The lines of the robots.txt file (only ASCII characters, no newline
	* characters at the end was already removed).
	* @return {string[]} All the disallowed url patterns that apply to your search engine.
	*/
	function parseRobotFile(robotLines) {
	let doeBotRules = [];
	let wildcardRules = [];

	// This variable tracks the agent type for the current section being processed.
	// It can be 'DoeBot', '*', or 'other' (if the section is for a different bot).
	let currentSectionAgentType = null;

	const USER_AGENT_PREFIX = "User-agent:";
	const DISALLOW_PREFIX = "Disallow:";

	for (const rawLine of robotLines) {
	// "There can be spaces at the beginning and end of a line... They must be ignored."
	const line = rawLine.trim();

	if (line.startsWith(USER_AGENT_PREFIX)) {
	// "All keywords, search engine names ... are case-sensitive."
	const agentName = line.substring(USER_AGENT_PREFIX.length).trim();
	if (agentName === "DoeBot") {
	currentSectionAgentType = "DoeBot";
	} else if (agentName === "*") {
	currentSectionAgentType = "*";
	} else {
	currentSectionAgentType = "other"; // Section for another bot
	}
	} else if (line.startsWith(DISALLOW_PREFIX)) {
	// "There is always at least one non-space character after the text Disallow:"
	// "A uri pattern can have any type of characters (letters, digits, etc.), except spaces."
	// Trimming here handles spaces between "Disallow:" and the actual pattern.
	const path = line.substring(DISALLOW_PREFIX.length).trim();

	// Add rule only if it's for an applicable agent section
	// and path is not empty (though problem states it won't be)
	if (path) { // Ensure path is not empty after trim, though rule says it won't be.
	if (currentSectionAgentType === "DoeBot") {
	doeBotRules.push(path);
	} else if (currentSectionAgentType === "*") {
	wildcardRules.push(path);
	}
	}
	}
	// "All lines not starting with Disallow: or User-agent: must be ignored."
	// This is implicitly handled as other lines don't match the if/else if conditions.
	}

	let finalRules;
	// DoeBot specific rules take precedence.
	// If there are any rules specifically for "DoeBot", use them.
	// Otherwise, use rules for "*".
	if (doeBotRules.length > 0) {
	finalRules = doeBotRules;
	} else {
	finalRules = wildcardRules;
	}

	// "Before returning the disallowed url patterns, remove the duplicates and sort them."
	// Using a Set to handle duplicates.
	const uniqueRules = [...new Set(finalRules)];

	// Sorting alphabetically (default string sort is case-sensitive, as required).
	uniqueRules.sort();

	return uniqueRules;
	}

	// Example Test from the image:
	// const testLines = ["User-agent: *", "Crawl-delay: 10", "Disallow: /administrator/", "User-agent: DoeBot", "Disallow: /includes/", "Disallow: /"];
	// console.log(parseRobotFile(testLines)); // Expected: ["/", "/includes/"]