Last active
October 29, 2023 08:42
-
-
Save devilankur18/0aca36f1232e9b36a0996a25420b889d to your computer and use it in GitHub Desktop.
Scrape Data from Browser Console
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// How to use this | |
// 1. Go to vc list and set filters to shortlist vcs | |
// 2. Open Chrome Console | |
// 3. Paste the script, it will auto download the data as json | |
// 4. Optional, if you want csv, then go to https://csvjson.com/json2csv, upload the json to get csv | |
// Initialize an array to store the data | |
const dataArray = []; | |
// Set the duration for scrolling in milliseconds | |
const duration = 2 * 60 * 1000; // 2 minutes | |
// Function to simulate scrolling down | |
const scrollDown = () => { | |
window.scrollTo(0, document.body.scrollHeight); | |
}; | |
// Function to extract the data | |
const extractData = () => { | |
// Extracting all items | |
const items = document.querySelectorAll(".list-item"); | |
// Looping through each item | |
items.forEach((item, index) => { | |
const name = item.querySelector(".list-heading").textContent.trim(); | |
const photo = item | |
.querySelector(".list-photo") | |
.style.backgroundImage.slice(4, -1) | |
.replace(/"/g, ""); | |
const role = item.querySelector(".list-title").textContent.trim(); | |
const description = item.querySelector(".shortdesccard").textContent.trim(); | |
const website = item.querySelector("a.contact-icon.site-link").href; | |
let twitter = null; | |
const socialIcons = item.querySelectorAll(".contact-icon"); | |
socialIcons.forEach((icon) => { | |
if (icon.querySelector(".dms-open")) { | |
const href = icon.getAttribute("href"); | |
if (href.startsWith("https://twitter.com")) { | |
twitter = href; | |
} | |
} | |
}); | |
const linkedin = item.querySelector("a.contact-icon.linkedin").href; | |
const crunchbase = item.querySelector("a.contact-icon.crunchbase").href; | |
// Creating a JSON object | |
const data = { | |
name, | |
photo, | |
role, | |
description, | |
website, | |
twitter, | |
linkedin, | |
crunchbase, | |
}; | |
// Pushing the JSON object to the array | |
dataArray.push(data); | |
}); | |
// Convert the array of JSON objects to a JSON string | |
const jsonData = JSON.stringify(dataArray, null, 2); | |
// Create a Blob from the JSON data | |
const blob = new Blob([jsonData], { type: "application/json" }); | |
// Create a URL for the Blob | |
const url = URL.createObjectURL(blob); | |
// Create a link element | |
const a = document.createElement("a"); | |
a.href = url; | |
a.download = `investors-${dataArray.length}.json`; | |
a.click(); | |
}; | |
// Perform scrolling for the specified duration | |
const startTime = new Date().getTime(); | |
const scrollInterval = setInterval(() => { | |
const currentTime = new Date().getTime(); | |
if (currentTime - startTime >= duration) { | |
clearInterval(scrollInterval); | |
extractData(); | |
} else { | |
scrollDown(); | |
} | |
}, 1000); // Scroll every 1 second |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment