Last active
October 20, 2024 11:34
-
-
Save rajvermacas/36a6cfa5406eb3b218170f0dd1119e40 to your computer and use it in GitHub Desktop.
stock export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const scrapeData = () => { | |
// 1. Find the table | |
const sections = document.querySelectorAll("#screener-table > table"); | |
/* | |
- document.querySelectorAll finds all elements matching the CSS selector | |
- "#screener-table > table" means: | |
- Find element with ID "screener-table" | |
- ">" means direct child | |
- "table" means find table elements | |
- Returns a NodeList of matching tables | |
*/ | |
// 2. Get all table headers | |
const headingElements = sections[0].querySelectorAll("th"); | |
/* | |
- sections[0] gets the first table found | |
- querySelectorAll("th") finds all table header cells | |
- "th" is HTML tag for table headers | |
*/ | |
// 3. Transform headers into structured data | |
const scrapedData = Array.from(headingElements).map(elm => { | |
/* | |
- Array.from converts NodeList to array for using map | |
- map processes each header element (elm) | |
*/ | |
// 4. Determine column selector | |
const selector = elm.getAttribute("id") === "name" ? | |
"data-col" : elm.getAttribute("id") + "-col"; | |
/* | |
- Gets header's ID attribute | |
- If ID is "name", use "data-col" | |
- Otherwise, append "-col" to the ID | |
- This creates CSS class selectors for finding column data | |
*/ | |
// 5. Find corresponding row cells | |
const rowElements = sections[0] | |
.querySelectorAll('tbody')[0] | |
// Fix for error | |
// Error processing header element 5: SyntaxError: Failed to execute 'querySelectorAll' on 'Element': 'tbody .26wpct-col .ellipsis .desktop--only' is not a valid selector. | |
// at Array.map (<anonymous>) | |
// at scrapeData (<anonymous>:20:53) | |
// at <anonymous>:89:18 | |
// Issue due to below line | |
// .querySelectorAll("." + selector + " .ellipsis .desktop--only") | |
// Fixed with below line | |
.querySelectorAll(`tbody [class~="${selector}"] .ellipsis .desktop--only`) | |
/* | |
- sections[0] - get first table | |
- querySelectorAll('tbody')[0] - get first tbody element | |
- querySelectorAll(". + selector + " .ellipsis .desktop--only") | |
finds elements with: | |
- class matching the selector | |
- class "ellipsis" | |
- class "desktop--only" | |
*/ | |
// 6. Return structured column data | |
return { | |
// Get column name | |
column: elm.querySelector(".data-cell .desktop--only") ? | |
elm.querySelector(".data-cell .desktop--only").textContent : "#", | |
/* | |
- Looks for element with classes "data-cell" and "desktop--only" | |
- If found, uses its text content as column name | |
- If not found, uses "#" as default | |
*/ | |
// Get row values | |
rows: Array.from(rowElements).map(el => el.textContent.replaceAll(",","")) | |
/* | |
- Convert rowElements to array | |
- For each element, get its text content | |
- Remove all commas from the text | |
- Creates array of cleaned cell values | |
*/ | |
} | |
}); | |
// 7. Return final structured data | |
return scrapedData; | |
/* | |
Returns array of objects, each containing: | |
{ | |
column: "Column Name", | |
rows: ["value1", "value2", "value3", ...] | |
} | |
*/ | |
} | |
const generateCSVData = scrapedData => { | |
scrapedData = scrapedData.slice(1, scrapedData.length - 1); | |
const count = scrapedData[0].rows.length; | |
// generate structured data | |
const csvData = Array.from({ | |
length: count | |
}, (_, rowIndex) => { | |
var obj = {}; | |
scrapedData.forEach(data => { | |
obj[data.column] = `"${data.rows[rowIndex]}"` | |
}) | |
return obj; | |
}); | |
const headers = Object.keys(csvData[0]).toString(); | |
// Get and stringify the keys of the first object in the array | |
const main = csvData.map(item => Object.values(item).toString()); | |
// Map finally returns array of arrays of values in each object | |
const csv = [headers, ...main].join('\n'); | |
// Creates new array, where first row is keys and further rows the values in each object | |
return csv; | |
} | |
const downloadFile = csvData => { | |
const anchor = document.createElement('a'); | |
anchor.href = 'data:text/csv;charset=utf-8,' + encodeURI(csvData); | |
anchor.target = '_blank'; | |
anchor.download = `${document.title}`; | |
anchor.click(); | |
} | |
downloadFile(generateCSVData(scrapeData())); | |
// ----------------------------------------------------------------------------- Debugger --------------------------------------------------------------------------------- | |
// const scrapeData = () => { | |
// try { | |
// // Log initial execution | |
// console.log("Starting scrapeData function"); | |
// const sections = document.querySelectorAll("#screener-table > table"); | |
// console.log("Found tables:", sections.length); | |
// if (sections.length === 0) { | |
// throw new Error("No table found with ID 'screener-table'"); | |
// } | |
// const headingElements = sections[0].querySelectorAll("th"); | |
// console.log("Found heading elements:", headingElements.length); | |
// if (headingElements.length === 0) { | |
// throw new Error("No header elements (th) found in table"); | |
// } | |
// const scrapedData = Array.from(headingElements).map((elm, index) => { | |
// try { | |
// // Log header element details | |
// console.log(`\nProcessing header ${index}:`, elm); | |
// console.log("Header ID:", elm.getAttribute("id")); | |
// const selector = elm.getAttribute("id") === "name" ? | |
// "data-col" : elm.getAttribute("id") + "-col"; | |
// console.log("Generated selector:", selector); | |
// // Validate selector before using it | |
// if (selector.includes(".") || selector.includes("#") || /^\d/.test(selector)) { | |
// console.warn(`Invalid selector detected: ${selector}`); | |
// // Clean the selector - remove problematic characters or handle numerics | |
// const cleanSelector = selector.replace(/^\d+/, 'n$&').replace(/[.#]/g, '_'); | |
// console.log("Cleaned selector:", cleanSelector); | |
// } | |
// // Build selector parts separately for debugging | |
// const fullSelector = `tbody [class~="${selector}"] .ellipsis .desktop--only`; | |
// console.log("Full selector being used:", fullSelector); | |
// const rowElements = sections[0].querySelectorAll(fullSelector); | |
// console.log("Found row elements:", rowElements.length); | |
// return { | |
// column: (() => { | |
// try { | |
// const cellElement = elm.querySelector(".data-cell .desktop--only"); | |
// return cellElement ? cellElement.textContent.trim() : "#"; | |
// } catch (cellError) { | |
// console.error("Error getting column name:", cellError); | |
// return "#"; | |
// } | |
// })(), | |
// rows: Array.from(rowElements).map((el, rowIndex) => { | |
// try { | |
// return el.textContent.replaceAll(",", "").trim(); | |
// } catch (rowError) { | |
// console.error(`Error processing row ${rowIndex}:`, rowError); | |
// return ""; | |
// } | |
// }) | |
// }; | |
// } catch (elementError) { | |
// console.error(`Error processing header element ${index}:`, elementError); | |
// return { | |
// column: `Error_Column_${index}`, | |
// rows: [] | |
// }; | |
// } | |
// }); | |
// console.log("Final scraped data:", scrapedData); | |
// return scrapedData; | |
// } catch (error) { | |
// console.error("Error in scrapeData function:", error); | |
// console.error("Error stack:", error.stack); | |
// // Return empty array or error indicator | |
// return [{ | |
// column: "Error", | |
// rows: [`Error scraping data: ${error.message}`] | |
// }]; | |
// } | |
// }; | |
// // Test the function with error catching | |
// try { | |
// const result = scrapeData(); | |
// console.log("Function completed. Result:", result); | |
// } catch (error) { | |
// console.error("Error executing scrapeData:", error); | |
// } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment