stuartlangridge · May 14, 2022 07:17 · skroggur · Dec 31, 2023 · RayMairlot · Jan 10, 2024
diff --git a/grab.js b/grab.js
 /*
 in response to tweet 1525230931279233026
 how to grab that image
 the in-page script doesn't load an image, it instead loads
  the image data in a weird format and then renders it to
  one of a bunch of in-page canvases
 the loaded "image" data appears to come in multiple widths
  and the page fetches the smallest one it can get away with,
  a la srcsets, which is good for bandwidth
 so first we need to convince it to get the largest version of
  its proprietary image format
 thus, we grab the container that all the canvases are in and
  resize it to be bigger than the biggest size of the image
 here, 6000px is big enough. Finding this out is a question of
  experimentation, although you can also read the original
  width from the --sf-original-height custom style property
  on the smart frame component. Grabbing this value and using
  it as the resize target here is left as an exercise for the
  reader.
 */
 var ap = document.querySelector("div.articlePage.container");
 ap.style.width="6000px";
 ap.style.maxWidth="6000px";

 /*
 OK, that resizing will trigger the in-page code to go and download
  a new, larger, version of the original image and render it.
 We need to wait for that to happen before downloading the new
  image data. Obviously, the best way to do this would be to
  actually work out when that's finished and then run the next
  bit of code, but the work hasn't been done to work out how
  to do that, and canvases (obviously) don't have an onloaded
  property. So instead we hackily just wait a couple of seconds
  with a setTimeout and then run the code after that. If you keep
  getting a smaller image, or an empty image, jack up the timeout
  at the end from 2000 (milliseconds) to something larger.
 */
 setTimeout(()=>{
  /*
  The site code tries to stop people downloading the images by
    overwriting the useful methods on their canvas object (the
    toDataURL, toBlob, getContext, etc methods) with a different
    method that pops up their warning instead.
  So we make another canvas, borrow *its* toDataURL method, and
    call that with the target canvas as "this"; that's what the
    call statement below does, because JS object methods aren't
    really methods, they're a sort of syntactic sugar.
  */
  var c = document.querySelector("canvas.stage");
  var u = document.createElement("canvas").toDataURL.call(c);
  /*
  Cool, now we've got the image data URL in u. For convenience,
    let's download it rather than give someone a sad string which
    is about sixty zillion bytes long. To do so, make a link,
    set its href to be the data URL, set the download property to
    be the name of the image, and programmatically click on it,
    which will download the data as a PNG, as expected.
  */
  var a = document.createElement("a");
  a.href = u;
  a.download = "stolen.png";
  a.click();
 }, 2000); // this is the 2000 to change to increase the timeout
	/*
	in response to tweet 1525230931279233026
	how to grab that image
	the in-page script doesn't load an image, it instead loads
	the image data in a weird format and then renders it to
	one of a bunch of in-page canvases
	the loaded "image" data appears to come in multiple widths
	and the page fetches the smallest one it can get away with,
	a la srcsets, which is good for bandwidth
	so first we need to convince it to get the largest version of
	its proprietary image format
	thus, we grab the container that all the canvases are in and
	resize it to be bigger than the biggest size of the image
	here, 6000px is big enough. Finding this out is a question of
	experimentation, although you can also read the original
	width from the --sf-original-height custom style property
	on the smart frame component. Grabbing this value and using
	it as the resize target here is left as an exercise for the
	reader.
	*/
	var ap = document.querySelector("div.articlePage.container");
	ap.style.width="6000px";
	ap.style.maxWidth="6000px";

	/*
	OK, that resizing will trigger the in-page code to go and download
	a new, larger, version of the original image and render it.
	We need to wait for that to happen before downloading the new
	image data. Obviously, the best way to do this would be to
	actually work out when that's finished and then run the next
	bit of code, but the work hasn't been done to work out how
	to do that, and canvases (obviously) don't have an onloaded
	property. So instead we hackily just wait a couple of seconds
	with a setTimeout and then run the code after that. If you keep
	getting a smaller image, or an empty image, jack up the timeout
	at the end from 2000 (milliseconds) to something larger.
	*/
	setTimeout(()=>{
	/*
	The site code tries to stop people downloading the images by
	overwriting the useful methods on their canvas object (the
	toDataURL, toBlob, getContext, etc methods) with a different
	method that pops up their warning instead.
	So we make another canvas, borrow its toDataURL method, and
	call that with the target canvas as "this"; that's what the
	call statement below does, because JS object methods aren't
	really methods, they're a sort of syntactic sugar.
	*/
	var c = document.querySelector("canvas.stage");
	var u = document.createElement("canvas").toDataURL.call(c);
	/*
	Cool, now we've got the image data URL in u. For convenience,
	let's download it rather than give someone a sad string which
	is about sixty zillion bytes long. To do so, make a link,
	set its href to be the data URL, set the download property to
	be the name of the image, and programmatically click on it,
	which will download the data as a PNG, as expected.
	*/
	var a = document.createElement("a");
	a.href = u;
	a.download = "stolen.png";
	a.click();
	}, 2000); // this is the 2000 to change to increase the timeout