Created
December 14, 2018 12:18
-
-
Save ZeeCoder/664855aad71a67a686a1790ff3ca9193 to your computer and use it in GitHub Desktop.
Instagram Comment Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript:(async()=>{const VERSION="2";const delay=n=>new Promise(resolve=>setTimeout(resolve,n));const startTime=new Date;const ui={view:{},state:{progress:"Loading...",rateLimit:"",csv:""},destroy:function(){const pastUI=document.querySelector("#EVENTSTAG-UI");if(pastUI){pastUI.parentElement.removeChild(pastUI)}},initialRender:function(){this.destroy();this.view.background=document.createElement("div");this.view.background.id="EVENTSTAG-UI";this.view.buttons=document.createElement("div");this.view.copyButton=document.createElement("button");this.view.copyButton.textContent="Copy CSV";this.view.closeButton=document.createElement("button");this.view.closeButton.textContent="Close";this.view.closeButton.onclick=(()=>this.destroy());this.view.csv=document.createElement("div");this.view.progress=document.createElement("div");this.view.rateLimit=document.createElement("div");this.view.csvInput=document.createElement("textarea");this.view.copyButton.onclick=(()=>{this.view.csvInput.focus();this.view.csvInput.select();document.execCommand("copy");alert("CSV copied")});const buttonStyle={fontSize:"20px",height:"40px",padding:"0 15px",margin:"20px",cursor:"pointer"};Object.assign(this.view.background.style,{position:"fixed",left:"0",top:"0",width:"100%",height:"100%",background:"rgba(0,0,0,.8)",color:"white",display:"flex"});Object.assign(this.view.progress.style,{position:"absolute",right:"20px",top:"0",whiteSpace:"nowrap",fontSize:"20px",display:"none",lineHeight:"80px"});Object.assign(this.view.rateLimit.style,{position:"absolute",left:"50%",top:"0",color:"red",whiteSpace:"nowrap",fontSize:"20px",display:"none",lineHeight:"80px",transform:"translateX(-50%)"});Object.assign(this.view.buttons.style,{display:"none"});Object.assign(this.view.csv.style,{background:"white",color:"black",margin:"20px",flex:"1",padding:"20px",display:"none",whiteSpace:"pre-line",overflow:"auto"});Object.assign(this.view.csvInput.style,{position:"absolute",left:"100vw"});Object.assign(this.view.closeButton.style,buttonStyle);Object.assign(this.view.copyButton.style,buttonStyle);this.view.background.appendChild(this.view.buttons);this.view.background.appendChild(this.view.progress);this.view.background.appendChild(this.view.rateLimit);this.view.buttons.appendChild(this.view.copyButton);this.view.buttons.appendChild(this.view.closeButton);this.view.background.appendChild(this.view.csv);this.view.background.appendChild(this.view.csvInput);document.body.appendChild(this.view.background);this.render()},render:function(newState={}){Object.assign(this.state,newState);if(typeof this.state.progress==="string"){this.view.progress.textContent=this.state.progress;this.view.progress.style.display="block"}else{this.view.progress.style.display="none"}if(typeof this.state.rateLimit==="string"){this.view.rateLimit.textContent=this.state.rateLimit;this.view.rateLimit.style.display="block"}else{this.view.rateLimit.style.display="none"}if(typeof this.state.csv==="string"){this.view.csvInput.value=this.state.csv;this.view.csv.textContent=this.state.csv;this.view.buttons.style.display="block";this.view.csv.style.display="block"}else{this.view.buttons.style.display="none";this.view.csv.style.display="none"}}};const loadMoreButton=document.querySelector("li button");const isLoadMoreButtonAvailable=()=>Boolean(document.querySelector("li button"));const waitForEnabledLoadButton=()=>new Promise(resolve=>{const loop=async()=>{if(!loadMoreButton.disabled){return resolve()}await delay(100);loop()};loop()});const getCommentNodes=()=>{return[...document.querySelectorAll("article section + div li")].slice(2)};const getCommentFromNode=node=>{const comment={name:"",message:""};const name=node.querySelector("a");const message=node.querySelector("h3 + span");if(name){comment.name=name.textContent}if(message){comment.message=message.textContent}return comment};const removeNode=node=>node.parentElement.removeChild(node);const loadNextCommentPage=async()=>{if(!isLoadMoreButtonAvailable()){return}loadMoreButton.click();await waitForEnabledLoadButton()};let extractingFirstCommentPage=true;const extractCommentsFromPage=()=>{const nodes=getCommentNodes();const comments=nodes.filter(node=>node.id!=="ET-DUMMY-COMMENT").map(node=>getCommentFromNode(node));nodes.slice(1).forEach(node=>removeNode(node));if(extractingFirstCommentPage){extractingFirstCommentPage=false;return comments}return comments.slice(0,comments.length-1)};const getComments=async({frequency:frequency=300,handleComments:handleComments})=>new Promise(resolve=>{const loop=async()=>{await loadNextCommentPage();const comments=await extractCommentsFromPage();if(!comments.length){return resolve()}handleComments(comments);await delay(frequency);loop()};loop()});const getCleanCsvField=text=>{if(text.includes(";")||text.includes(`"`)){if(text.includes('"')){text=text.replace(/"/g,'""')}text=`"${text}"`}return text};const getCsvFromComments=comments=>{const csvLines=[];for(let comment of comments){csvLines.push([getCleanCsvField(comment.name),getCleanCsvField(comment.message)].join(";"))}return csvLines.join("\n")};const getFrequencyFromUser=()=>new Promise(resolve=>{const loop=()=>{let frequency=prompt("(v"+VERSION+") At what frequency should the comment pages be loaded? (300 or more) "+"Recommended values: 300 for less than 1000 comments, otherwise 1000 or more","300");if(frequency===null){return null}try{frequency=parseInt(frequency);if(frequency<300){alert("The given frequency has to be 300 or more.")}else if(isNaN(frequency)){throw new Error("Invalid number")}else{return resolve(frequency)}}catch(error){alert(`Error, please try again! (${error})`)}loop()};loop()});const frequency=await getFrequencyFromUser();if(frequency===null){return}await ui.initialRender();let pageCount=1;ui.render({progress:`Loading comment page#${pageCount}...`});const rateLimitDetector={triggerAfterSeconds:10*1e3,timeout:null,restart:function(){ui.render({rateLimit:""});if(this.timeout){clearTimeout(this.timeout)}this.timeout=setTimeout(()=>{ui.render({rateLimit:"Rate limit may have reached."})},this.triggerAfterSeconds)},stop:function(){if(this.timeout){clearTimeout(this.timeout)}ui.render({rateLimit:""})}};const csvParts=[];rateLimitDetector.restart();await getComments({frequency:frequency,handleComments:batch=>{rateLimitDetector.restart();const csvPart=getCsvFromComments(batch);csvParts.unshift(csvPart);pageCount++;ui.render({csv:csvParts.join("\n"),progress:`Loading comment page#${pageCount}`})}});await delay(300);rateLimitDetector.stop();const stopTime=new Date;const elapsedSeconds=(stopTime-startTime)/1e3;await ui.render({progress:`Done in ${elapsedSeconds} seconds. Loaded ${pageCount} pages.`})})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment