Created
April 7, 2020 21:49
-
-
Save FarisHijazi/6c9ba3fb315d0ce9bfa62c10dfa8b2f8 to your computer and use it in GitHub Desktop.
Google images new format (2020) is unparsable (unlike using `.rg_meta`), this script will parse the first 100 or so images on a google images page and return
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// code to get this: | |
/** | |
* @author github.com/FarisHijazi | |
* | |
* @returns {{ "id": String, "tu": String, "th": String, "tw": String, "ou": String, "oh": String, "ow": String, "pt": String, "st": String, }[]} | |
* returns a list of objects, these contain the image info | |
* | |
* how it works: | |
* there's a <script> that contains the images info, the code in it contains `AF_initDataCallback` | |
* this contains the image data | |
* | |
* | |
* example meta object: | |
* | |
* meta = { | |
* "id": "ZR4fY_inahuKM:", // ID: corresponds to `data-item-id` | |
* "isu": "gifs.cc", // | |
* "itg": 0, // | |
* "ity": "gif", // Image Type | |
* "oh": 322, // Original Height | |
* "ou": "http://78.media.tumblr.com/....500.gif", // Original URL | |
* "ow": 492, // Original Width | |
* "pt": "", // PrimaryTitle | |
* "rh": "gifs.cc", | |
* "rid": "nyyV1PqBnBltYM", // Referrer ID | |
* "rmt": 0, // | |
* "rt": 0, // | |
* "ru": "", // | |
* "s": "Photo", // Site | |
* "st": "", // Secondary Title | |
* "th": 182, // Thumbnail Height | |
* "tu": "https://encrypted-tbn0.gstatic.com/images?q\\",// Thumbnail URL | |
* "tw": 278 // Thumbnail Width | |
* } | |
* | |
*/ | |
function getImgMetas() { | |
var data = Array.from(document.querySelectorAll('script[nonce]')) | |
.map(s => s.innerText) | |
.filter(t => /^AF_initDataCallback/.test(t)) | |
.map(t => { | |
try { | |
return eval(t.replace('AF_initDataCallback', '')); | |
} catch (e) { | |
console.error(e); | |
return {}; | |
} | |
}).filter(o => o.data) | |
.map(o => eval(o.data.toString().replace(/(^function\s*\(\s*\)\s*{\s*return\s*|\s*}\s*$)/g, ''))) | |
.filter(d => d && d.length && d.reduce((acc, el) => acc || el && el.length)) | |
; | |
var entry = data[0]; | |
var imgMetas = entry[31][0][12][2].map(meta => meta[1]); // confirmed | |
var metas = imgMetas.map(meta => { | |
try { | |
const id = meta[1]; | |
const [tu, th, tw] = meta[2]; | |
const [ou, oh, ow] = meta[3]; | |
const siteAndNameInfo = meta[9] || meta[11]; | |
const pt = siteAndNameInfo[2003][2]; | |
const st = siteAndNameInfo[183836587][0]; // infolink TODO: doublecheck | |
return ({ | |
'id': id, | |
// thumbnail | |
'tu': tu, | |
'th': th, | |
'tw': tw, | |
// original | |
'ou': ou, | |
'oh': oh, | |
'ow': ow, | |
// site and name | |
'pt': pt, | |
'st': st,// info link | |
}) | |
} catch (e) { | |
console.warn(e); | |
} | |
}).filter(meta=>!!meta); | |
return metas; | |
} | |
getImgMetas(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment