Last active
October 29, 2018 16:46
-
-
Save gitfvb/6a21d905775b8177cbba97034608095f to your computer and use it in GitHub Desktop.
Preparation for DSGVO/GDPR to save multiple website as screenshot (PNG) and html. This script can emulate a click on a button like "newsletter subscription" by giving a third (a class name) or fourth (a id) parameter. To get this powershell running, please download https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-windows.zip and p…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "link": "https://www.example.com/datenschutzbestimmungen", | |
| "title": "Datenschutz" | |
| }, | |
| { | |
| "link": "https://newsletterpopup.de", | |
| "title": "Datenschutz_Newsletter" | |
| } | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| helper for formatted date and time | |
| */ | |
| function getCurrentDateTime() { | |
| /* | |
| function based on https://stackoverflow.com/questions/25275696/javascript-format-date-time | |
| */ | |
| var today = new Date(Date.now()); | |
| var ss = today.getSeconds(); | |
| var MM = today.getMinutes(); | |
| var hh = today.getHours(); | |
| var dd = today.getDate(); | |
| var mm = today.getMonth()+1; //January is 0! | |
| var yyyy = today.getFullYear(); | |
| if(ss<10){ ss='0'+ss; } | |
| if(MM<10){ MM='0'+MM; } | |
| if(hh<10){ hh='0'+hh; } | |
| if(dd<10){ dd='0'+dd; } | |
| if(mm<10){ mm='0'+mm; } | |
| return yyyy + mm + dd + hh + MM + ss; | |
| } | |
| /* | |
| create the screenshot for a url and give it a name (datetime and name will be concatenated for filename.png) | |
| buttonClass or buttonId is optional to press a button in between (e.g. a popup) | |
| */ | |
| function createScreenshot(url, name, datetime, buttonClass, buttonId) { | |
| var page = require('webpage').create(); | |
| page.viewportSize = { width: 1920, height: 1080 }; | |
| // only needed when you want to enforce page.evaluate print the console output | |
| /* | |
| page.onConsoleMessage = function(msg) { | |
| console.log(msg); | |
| } | |
| */ | |
| //console.log("Datum: " + datetime); | |
| page.open(url, function(status) { | |
| console.log("Status: " + status); | |
| if(status === "success") { | |
| // press a specific button on the website identified by class or id | |
| page.evaluate(function(buttonClass,buttonId) { | |
| if (typeof(buttonClass) !== 'undefined') { | |
| console.log('pressing button with class name' + buttonClass); | |
| document.getElementsByClassName(buttonClass)[0].click(); | |
| } | |
| if (typeof(buttonId) !== 'undefined') { | |
| console.log('pressing button with id' + buttonId); | |
| document.getElementById(buttonId).click(); | |
| } | |
| waitforload = true; | |
| }, buttonClass, buttonId); | |
| //var datetime = getCurrentDateTime(); | |
| // create screenshot | |
| page.render('grabs/' + datetime + '_' + name.toLowerCase() + '.png'); | |
| // save as html | |
| fs.write('grabs/' + datetime + '_' + name.toLowerCase() + '.html', page.content, 'w'); | |
| } | |
| phantom.exit(); | |
| }); | |
| } | |
| /* | |
| MAIN PROCESS | |
| */ | |
| var fs = require('fs'); | |
| var system = require('system'); | |
| var args = system.args; | |
| // only needed if you want to see all args given by the CLI | |
| /* | |
| args.forEach(function(arg, i) { | |
| console.log(i + ': ' + arg); | |
| }); | |
| */ | |
| createScreenshot(args[1], args[2], args[3], args[4], args[5]); | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ################################################ | |
| # | |
| # SCRIPT ROOT | |
| # | |
| ################################################ | |
| # script root path | |
| if ($MyInvocation.MyCommand.CommandType -eq "ExternalScript") | |
| { $scriptPath = Split-Path -Parent -Path $MyInvocation.MyCommand.Definition } | |
| else | |
| { $scriptPath = Split-Path -Parent -Path ([Environment]::GetCommandLineArgs()[0]) } | |
| cd $scriptPath | |
| ################################################ | |
| # | |
| # SETTINGS | |
| # | |
| ################################################ | |
| $timestamp = [datetime]::UtcNow.ToString("yyyyMMddHHmmss") | |
| $links = Get-Content -Path "links.json" -Encoding Default | ConvertFrom-Json | |
| "$( [datetime]::UtcNow.ToString("yyyyMMddHHmmss") )`t--------------------------------" >> "log.txt" | |
| ################################################ | |
| # | |
| # SCREENGRABBING | |
| # | |
| ################################################ | |
| $links | ForEach { | |
| "$( [datetime]::UtcNow.ToString("yyyyMMddHHmmss") )`tGenerating screenshot and file for url ""$( $_.link )"" with title ""$( $_.title )""" >> "log.txt" | |
| $returnValue = "" | |
| $returnValue = .\phantomjs.exe .\phantomdocumentation.js $_.link $_.title $timestamp | |
| "$( [datetime]::UtcNow.ToString("yyyyMMddHHmmss") )`t$( $returnValue )" >> "log.txt" | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment