rajvermacas · October 20, 2024 11:34
diff --git a/stock_export.js b/stock_export.js
 const scrapeData = () => {
  // 1. Find the table
  const sections = document.querySelectorAll("#screener-table > table");
  /* 
  - document.querySelectorAll finds all elements matching the CSS selector
  - "#screener-table > table" means:
     - Find element with ID "screener-table"
     - ">" means direct child
     - "table" means find table elements
  - Returns a NodeList of matching tables
  */

  // 2. Get all table headers
  const headingElements = sections[0].querySelectorAll("th");
  /*
  - sections[0] gets the first table found
  - querySelectorAll("th") finds all table header cells
  - "th" is HTML tag for table headers
  */

  // 3. Transform headers into structured data
  const scrapedData = Array.from(headingElements).map(elm => {
    /*
    - Array.from converts NodeList to array for using map
    - map processes each header element (elm)
    */

    // 4. Determine column selector
    const selector = elm.getAttribute("id") === "name" ? 
                    "data-col" : elm.getAttribute("id") + "-col";
    /*
    - Gets header's ID attribute
    - If ID is "name", use "data-col"
    - Otherwise, append "-col" to the ID
    - This creates CSS class selectors for finding column data
    */

    // 5. Find corresponding row cells
    const rowElements = sections[0]
      .querySelectorAll('tbody')[0]
      // Fix for error
      // Error processing header element 5: SyntaxError: Failed to execute 'querySelectorAll' on 'Element': 'tbody .26wpct-col .ellipsis .desktop--only' is not a valid selector.
      // at Array.map (<anonymous>)
      // at scrapeData (<anonymous>:20:53)
      // at <anonymous>:89:18

      // Issue due to below line
      // .querySelectorAll("." + selector + " .ellipsis .desktop--only")

      // Fixed with below line
      .querySelectorAll(`tbody [class~="${selector}"] .ellipsis .desktop--only`)
      
    /*
    - sections[0] - get first table
    - querySelectorAll('tbody')[0] - get first tbody element
    - querySelectorAll(". + selector + " .ellipsis .desktop--only")
      finds elements with:
      - class matching the selector
      - class "ellipsis"
      - class "desktop--only"
    */

    // 6. Return structured column data
    return {
      // Get column name
      column: elm.querySelector(".data-cell .desktop--only") ? 
              elm.querySelector(".data-cell .desktop--only").textContent : "#",
      /*
      - Looks for element with classes "data-cell" and "desktop--only"
      - If found, uses its text content as column name
      - If not found, uses "#" as default
      */

      // Get row values
      rows: Array.from(rowElements).map(el => el.textContent.replaceAll(",",""))
      /*
      - Convert rowElements to array
      - For each element, get its text content
      - Remove all commas from the text
      - Creates array of cleaned cell values
      */
    }
  });

  // 7. Return final structured data
  return scrapedData;
  /*
  Returns array of objects, each containing:
  {
    column: "Column Name",
    rows: ["value1", "value2", "value3", ...]
  }
  */
 }

 const generateCSVData = scrapedData => {

  scrapedData = scrapedData.slice(1, scrapedData.length - 1);
  const count = scrapedData[0].rows.length; 

  // generate structured data
  const csvData = Array.from({
    length: count
  }, (_, rowIndex) => {
    var obj = {};
    scrapedData.forEach(data => {
      obj[data.column] = `"${data.rows[rowIndex]}"`
    })
    return obj;

  });



  const headers = Object.keys(csvData[0]).toString();
  // Get and stringify the keys of the first object in the array

  const main = csvData.map(item => Object.values(item).toString());
  // Map finally returns array of arrays of values in each object

  const csv = [headers, ...main].join('\n');
  // Creates new array, where first row is keys and further rows the values in each object

  return csv;
 }

 const downloadFile = csvData => {
  const anchor = document.createElement('a');
  anchor.href = 'data:text/csv;charset=utf-8,' + encodeURI(csvData);
  anchor.target = '_blank';
  anchor.download = `${document.title}`;
  anchor.click();
 }

 downloadFile(generateCSVData(scrapeData()));


 // ----------------------------------------------------------------------------- Debugger ---------------------------------------------------------------------------------

 // const scrapeData = () => {
 //   try {
 //     // Log initial execution
 //     console.log("Starting scrapeData function");

 //     const sections = document.querySelectorAll("#screener-table > table");
 //     console.log("Found tables:", sections.length);

 //     if (sections.length === 0) {
 //       throw new Error("No table found with ID 'screener-table'");
 //     }

 //     const headingElements = sections[0].querySelectorAll("th");
 //     console.log("Found heading elements:", headingElements.length);

 //     if (headingElements.length === 0) {
 //       throw new Error("No header elements (th) found in table");
 //     }

 //     const scrapedData = Array.from(headingElements).map((elm, index) => {
 //       try {
 //         // Log header element details
 //         console.log(`\nProcessing header ${index}:`, elm);
 //         console.log("Header ID:", elm.getAttribute("id"));

 //         const selector = elm.getAttribute("id") === "name" ? 
 //           "data-col" : elm.getAttribute("id") + "-col";
 //         console.log("Generated selector:", selector);

 //         // Validate selector before using it
 //         if (selector.includes(".") || selector.includes("#") || /^\d/.test(selector)) {
 //           console.warn(`Invalid selector detected: ${selector}`);
 //           // Clean the selector - remove problematic characters or handle numerics
 //           const cleanSelector = selector.replace(/^\d+/, 'n$&').replace(/[.#]/g, '_');
 //           console.log("Cleaned selector:", cleanSelector);
 //         }

 //         // Build selector parts separately for debugging
 //         const fullSelector = `tbody [class~="${selector}"] .ellipsis .desktop--only`;
 //         console.log("Full selector being used:", fullSelector);

 //         const rowElements = sections[0].querySelectorAll(fullSelector);
 //         console.log("Found row elements:", rowElements.length);

 //         return {
 //           column: (() => {
 //             try {
 //               const cellElement = elm.querySelector(".data-cell .desktop--only");
 //               return cellElement ? cellElement.textContent.trim() : "#";
 //             } catch (cellError) {
 //               console.error("Error getting column name:", cellError);
 //               return "#";
 //             }
 //           })(),
 //           rows: Array.from(rowElements).map((el, rowIndex) => {
 //             try {
 //               return el.textContent.replaceAll(",", "").trim();
 //             } catch (rowError) {
 //               console.error(`Error processing row ${rowIndex}:`, rowError);
 //               return "";
 //             }
 //           })
 //         };
 //       } catch (elementError) {
 //         console.error(`Error processing header element ${index}:`, elementError);
 //         return {
 //           column: `Error_Column_${index}`,
 //           rows: []
 //         };
 //       }
 //     });

 //     console.log("Final scraped data:", scrapedData);
 //     return scrapedData;

 //   } catch (error) {
 //     console.error("Error in scrapeData function:", error);
 //     console.error("Error stack:", error.stack);
 //     // Return empty array or error indicator
 //     return [{
 //       column: "Error",
 //       rows: [`Error scraping data: ${error.message}`]
 //     }];
 //   }
 // };

 // // Test the function with error catching
 // try {
 //   const result = scrapeData();
 //   console.log("Function completed. Result:", result);
 // } catch (error) {
 //   console.error("Error executing scrapeData:", error);
 // }
	const scrapeData = () => {
	// 1. Find the table
	const sections = document.querySelectorAll("#screener-table > table");
	/*
	- document.querySelectorAll finds all elements matching the CSS selector
	- "#screener-table > table" means:
	- Find element with ID "screener-table"
	- ">" means direct child
	- "table" means find table elements
	- Returns a NodeList of matching tables
	*/

	// 2. Get all table headers
	const headingElements = sections[0].querySelectorAll("th");
	/*
	- sections[0] gets the first table found
	- querySelectorAll("th") finds all table header cells
	- "th" is HTML tag for table headers
	*/

	// 3. Transform headers into structured data
	const scrapedData = Array.from(headingElements).map(elm => {
	/*
	- Array.from converts NodeList to array for using map
	- map processes each header element (elm)
	*/

	// 4. Determine column selector
	const selector = elm.getAttribute("id") === "name" ?
	"data-col" : elm.getAttribute("id") + "-col";
	/*
	- Gets header's ID attribute
	- If ID is "name", use "data-col"
	- Otherwise, append "-col" to the ID
	- This creates CSS class selectors for finding column data
	*/

	// 5. Find corresponding row cells
	const rowElements = sections[0]
	.querySelectorAll('tbody')[0]
	// Fix for error
	// Error processing header element 5: SyntaxError: Failed to execute 'querySelectorAll' on 'Element': 'tbody .26wpct-col .ellipsis .desktop--only' is not a valid selector.
	// at Array.map (<anonymous>)
	// at scrapeData (<anonymous>:20:53)
	// at <anonymous>:89:18

	// Issue due to below line
	// .querySelectorAll("." + selector + " .ellipsis .desktop--only")

	// Fixed with below line
	.querySelectorAll(`tbody [class~="${selector}"] .ellipsis .desktop--only`)

	/*
	- sections[0] - get first table
	- querySelectorAll('tbody')[0] - get first tbody element
	- querySelectorAll(". + selector + " .ellipsis .desktop--only")
	finds elements with:
	- class matching the selector
	- class "ellipsis"
	- class "desktop--only"
	*/

	// 6. Return structured column data
	return {
	// Get column name
	column: elm.querySelector(".data-cell .desktop--only") ?
	elm.querySelector(".data-cell .desktop--only").textContent : "#",
	/*
	- Looks for element with classes "data-cell" and "desktop--only"
	- If found, uses its text content as column name
	- If not found, uses "#" as default
	*/

	// Get row values
	rows: Array.from(rowElements).map(el => el.textContent.replaceAll(",",""))
	/*
	- Convert rowElements to array
	- For each element, get its text content
	- Remove all commas from the text
	- Creates array of cleaned cell values
	*/
	}
	});

	// 7. Return final structured data
	return scrapedData;
	/*
	Returns array of objects, each containing:
	{
	column: "Column Name",
	rows: ["value1", "value2", "value3", ...]
	}
	*/
	}

	const generateCSVData = scrapedData => {

	scrapedData = scrapedData.slice(1, scrapedData.length - 1);
	const count = scrapedData[0].rows.length;

	// generate structured data
	const csvData = Array.from({
	length: count
	}, (_, rowIndex) => {
	var obj = {};
	scrapedData.forEach(data => {
	obj[data.column] = `"${data.rows[rowIndex]}"`
	})
	return obj;

	});



	const headers = Object.keys(csvData[0]).toString();
	// Get and stringify the keys of the first object in the array

	const main = csvData.map(item => Object.values(item).toString());
	// Map finally returns array of arrays of values in each object

	const csv = [headers, ...main].join('\n');
	// Creates new array, where first row is keys and further rows the values in each object

	return csv;
	}

	const downloadFile = csvData => {
	const anchor = document.createElement('a');
	anchor.href = 'data:text/csv;charset=utf-8,' + encodeURI(csvData);
	anchor.target = '_blank';
	anchor.download = `${document.title}`;
	anchor.click();
	}

	downloadFile(generateCSVData(scrapeData()));


	// ----------------------------------------------------------------------------- Debugger ---------------------------------------------------------------------------------

	// const scrapeData = () => {
	// try {
	// // Log initial execution
	// console.log("Starting scrapeData function");

	// const sections = document.querySelectorAll("#screener-table > table");
	// console.log("Found tables:", sections.length);

	// if (sections.length === 0) {
	// throw new Error("No table found with ID 'screener-table'");
	// }

	// const headingElements = sections[0].querySelectorAll("th");
	// console.log("Found heading elements:", headingElements.length);

	// if (headingElements.length === 0) {
	// throw new Error("No header elements (th) found in table");
	// }

	// const scrapedData = Array.from(headingElements).map((elm, index) => {
	// try {
	// // Log header element details
	// console.log(`\nProcessing header ${index}:`, elm);
	// console.log("Header ID:", elm.getAttribute("id"));

	// const selector = elm.getAttribute("id") === "name" ?
	// "data-col" : elm.getAttribute("id") + "-col";
	// console.log("Generated selector:", selector);

	// // Validate selector before using it
	// if (selector.includes(".") \|\| selector.includes("#") \|\| /^\d/.test(selector)) {
	// console.warn(`Invalid selector detected: ${selector}`);
	// // Clean the selector - remove problematic characters or handle numerics
	// const cleanSelector = selector.replace(/^\d+/, 'n$&').replace(/[.#]/g, '_');
	// console.log("Cleaned selector:", cleanSelector);
	// }

	// // Build selector parts separately for debugging
	// const fullSelector = `tbody [class~="${selector}"] .ellipsis .desktop--only`;
	// console.log("Full selector being used:", fullSelector);

	// const rowElements = sections[0].querySelectorAll(fullSelector);
	// console.log("Found row elements:", rowElements.length);

	// return {
	// column: (() => {
	// try {
	// const cellElement = elm.querySelector(".data-cell .desktop--only");
	// return cellElement ? cellElement.textContent.trim() : "#";
	// } catch (cellError) {
	// console.error("Error getting column name:", cellError);
	// return "#";
	// }
	// })(),
	// rows: Array.from(rowElements).map((el, rowIndex) => {
	// try {
	// return el.textContent.replaceAll(",", "").trim();
	// } catch (rowError) {
	// console.error(`Error processing row ${rowIndex}:`, rowError);
	// return "";
	// }
	// })
	// };
	// } catch (elementError) {
	// console.error(`Error processing header element ${index}:`, elementError);
	// return {
	// column: `Error_Column_${index}`,
	// rows: []
	// };
	// }
	// });

	// console.log("Final scraped data:", scrapedData);
	// return scrapedData;

	// } catch (error) {
	// console.error("Error in scrapeData function:", error);
	// console.error("Error stack:", error.stack);
	// // Return empty array or error indicator
	// return [{
	// column: "Error",
	// rows: [`Error scraping data: ${error.message}`]
	// }];
	// }
	// };

	// // Test the function with error catching
	// try {
	// const result = scrapeData();
	// console.log("Function completed. Result:", result);
	// } catch (error) {
	// console.error("Error executing scrapeData:", error);
	// }