Last active
May 1, 2023 00:07
-
-
Save homakov/594ae2fb3b5aad8cf23bb0987ea20691 to your computer and use it in GitHub Desktop.
booking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// go to booking.com and search for hotels | |
// set your favorite filters first to reduce the result pages to 5-10. | |
// then paste this script in JS console | |
!(function () { | |
function e(t, o) { | |
return n | |
? void (n.transaction("s").objectStore("s").get(t).onsuccess = function ( | |
e | |
) { | |
var t = (e.target.result && e.target.result.v) || null; | |
o(t); | |
}) | |
: void setTimeout(function () { | |
e(t, o); | |
}, 100); | |
} | |
var t = | |
window.indexedDB || | |
window.mozIndexedDB || | |
window.webkitIndexedDB || | |
window.msIndexedDB; | |
if (!t) return void console.error("indexDB not supported"); | |
var n, | |
o = { k: "", v: "" }, | |
r = t.open("d2", 1); | |
(r.onsuccess = function (e) { | |
n = this.result; | |
}), | |
(r.onerror = function (e) { | |
console.error("indexedDB request error"), console.log(e); | |
}), | |
(r.onupgradeneeded = function (e) { | |
n = null; | |
var t = e.target.result.createObjectStore("s", { keyPath: "k" }); | |
t.transaction.oncomplete = function (e) { | |
n = e.target.db; | |
}; | |
}), | |
(window.ldb = { | |
get: e, | |
set: function (e, t) { | |
(o.k = e), | |
(o.v = t), | |
n.transaction("s", "readwrite").objectStore("s").put(o); | |
}, | |
}); | |
})(); | |
function between(str, first, last) { | |
return str.split(first)[1].split(last)[0]; | |
} | |
local = {}; | |
function getContents(url) { | |
return local[url].html; | |
} | |
async function fetchURL(url) { | |
if (!local[url]) { | |
console.log("Requesting ", url); | |
r = await fetch(url); | |
let html = await r.text(); | |
// lighter text | |
let parser = new DOMParser(); | |
let doc = parser.parseFromString(html, "text/html"); | |
let all_prices = Array.from(doc.querySelectorAll('.prco-valign-middle-helper')).map(a=>{ | |
return parseInt(a.innerText.replace(/[^0-9]/g,'')) | |
}) | |
let all_m = Array.from(doc.querySelectorAll('[data-name-en="room size"]')).map(a=>{ | |
return parseInt(a.innerText.replace(/[^0-9]/g,'')) | |
}) | |
let all_rows = doc.querySelectorAll('.e2e-hprt-table-row') | |
let offset = 0 | |
let counter = 0 | |
let price_per_sqm = {0: 99} | |
let cheapest = false | |
let days = document.body.innerHTML.match(/([0-9]+)-night stay/)[1] | |
for (let i=0;i<all_rows.length;i++) { | |
if (all_rows[i].getAttribute('class').indexOf('last-row')!=-1){ | |
let sqm = all_m[counter] | |
let min = Math.min.apply(Math, all_prices.slice(offset, i+1)); | |
let per_sqm = min / sqm / days | |
if (!price_per_sqm[sqm] || per_sqm < price_per_sqm[sqm]) { | |
price_per_sqm[sqm] = per_sqm | |
} | |
if (!cheapest || per_sqm < price_per_sqm[cheapest]) { | |
cheapest = sqm | |
} | |
console.log(url, ' sliced ', i, offset, all_prices.slice(offset, i+1), sqm) | |
offset = i+1 | |
counter++ | |
} | |
} | |
let regexp = /welcoming Booking.com guests since (.*?)\.</gim; | |
let rawDate = regexp.exec(html); | |
let rawRating = (/>([0-9]\.[0-9])</gim).exec(html); | |
let reviews = html.match(/>([0-9,]+) reviews</); | |
local[url] = { | |
html: html, | |
reviews: reviews ? parseInt(reviews[1].replace(/,/,'')) : 0, | |
rawDate: rawDate ? rawDate[1] : '', | |
rawRating: rawRating ? parseFloat(rawRating[1]) : 0, | |
date: rawDate ? new Date(rawDate[1]).getTime() : 0, | |
all_prices: all_prices, | |
all_m: all_m, | |
all_rows: all_rows, | |
price_per_sqm: price_per_sqm, | |
cheapest: cheapest | |
} | |
console.log(local[url]) | |
/*between( | |
r, | |
'<div id="right" class="rlt-right" role="main">', | |
'<div id="right2" class="rlt-right">' | |
);*/ | |
} | |
return local[url]; | |
} | |
function filter() { | |
let minRatingValue = parseFloat(minRating.value); | |
let minRoomSizeValue = parseInt(minRoomSize.value); | |
let maxRoomSizeValue = parseInt(maxRoomSize.value); | |
let minReviewsValue = parseInt(minReviews.value); | |
let maxReviewsValue = parseInt(maxReviews.value); | |
let minPricePerSqmValue = parseInt(minPricePerSqm.value); | |
let maxPricePerSqmValue = parseInt(maxPricePerSqm.value); | |
var final = list.sort((a, b) => { | |
a = local[a.querySelector("[data-testid='title-link']").href] | |
b = local[b.querySelector("[data-testid='title-link']").href] | |
if (sortBy.value == 'reviews'){ | |
return b.reviews - a.reviews | |
} else if (sortBy.value == 'date') { | |
return b.date - a.date; | |
} else if (sortBy.value == 'price_per_sqm') { | |
return a.price_per_sqm[a.cheapest] - b.price_per_sqm[b.cheapest] | |
} | |
}); | |
document.querySelector(".hotellist_wrap").innerHTML = ""; | |
for (var n of final) { | |
let tags = []; | |
a = local[n.querySelector("[data-testid='title-link']").href] | |
tags.push("Added: " + a.rawDate); | |
if (!a.html.match) { | |
console.log("nomatch", a.html); | |
continue; | |
} | |
if ( | |
a.html.match(/Ping-pong/i) != null || | |
a.html.match(/table tennis/i) != null | |
) { | |
tags.push("tabletennis"); | |
} | |
if (a.html.match(/Coco-mat/i) != null) { | |
tags.push("coco-mat"); | |
} | |
if (a.html.match(/Bedroom 3/i) != null) { | |
tags.push("bedroom3"); | |
} else if (a.html.match(/Bedroom 2/i) != null) { | |
tags.push("bedroom2"); | |
} else if (a.html.match(/Bedroom 1/i) != null) { | |
tags.push("bedroom1"); | |
} | |
if (!(a.rawRating >= minRatingValue && | |
a.reviews >= minReviewsValue && | |
a.reviews <= maxReviewsValue)) { | |
continue | |
} | |
let hidden = true | |
for (let m2 in a.price_per_sqm){ | |
if (parseInt(m2) >= minRoomSizeValue && | |
parseInt(m2) <= maxRoomSizeValue && | |
a.price_per_sqm[m2] >= minPricePerSqmValue && | |
a.price_per_sqm[m2] <= maxPricePerSqmValue | |
) { | |
hidden = false | |
} | |
let str = m2+' - '+parseInt(a.price_per_sqm[m2]) | |
tags.push(a.cheapest == m2 ? '<b>'+str+'</b>' : str) | |
} | |
if (hidden) continue | |
document.querySelector(".hotellist_wrap").innerHTML+=(tags.join("<br>")); | |
document.querySelector(".hotellist_wrap").append(n); | |
} | |
} | |
function extract() { | |
window.list = Array.from(document.querySelectorAll('[data-testid="property-card"]')); | |
var all = []; | |
console.log("extracted ", list); | |
for (let propertyCard of list) { | |
var href = propertyCard.querySelector("[data-testid='title-link']").href; | |
all.push(fetchURL(href)); | |
} | |
Promise.all(all).then(filter); | |
var div = document.createElement("div"); | |
div.innerHTML = `<div style="position:fixed;left:45%;top:0px;z-index:99999;background-color: bisque; padding: 20px;"> | |
<select id="sortBy""> | |
<option value="reviews" selected>Reviews count</option> | |
<option value="price_per_sqm">Price per sqm</option> | |
<option value="date">Date opened</option> | |
</select><br> | |
<input type=text id="minRating" value="8.6">Min rating<br> | |
<input type=text id="minRoomSize" value="40">Min room size | |
<input type=text id="maxRoomSize" value="99999999">Max room size<br> | |
<input type=text id="minReviews" value="20">Min reviews | |
<input type=text id="maxReviews" value="99999999">Max reviews<br> | |
<input type=text id="minPricePerSqm" value="0">Min price per sqm | |
<input type=text id="maxPricePerSqm" value="99999999">Max price per sqm | |
</div>`; | |
document.body.appendChild(div); | |
sortBy.onchange = filter | |
minRating.onchange = filter; | |
minRoomSize.onchange = filter; | |
maxRoomSize.onchange = filter; | |
minReviews.onchange = filter; | |
maxReviews.onchange = filter; | |
minPricePerSqm.onchange = filter; | |
maxPricePerSqm.onchange = filter; | |
} | |
var pagination = document.querySelectorAll('button.fc63351294.f9c5690c58') | |
var last_page = pagination[0] ? parseInt(pagination[pagination.length-2].innerHTML) : 1 | |
console.log("Fetching up to "+last_page) | |
var all_pag = [] | |
for (var i = 1;i<last_page;i++){ | |
let url = location.href+'&offset='+i*25 | |
all_pag.push(fetch(url).then(r=>r.text()).then(body=>{ | |
//var strip = between(body, '</div><h2 class="e6e585da68">', '<div class="d8f77e681c">') | |
var strip = between(body, '<div id="ajaxsrwrap">', '</div> <!-- /ajaxsrwrap -->') | |
//console.log(url, strip) | |
document.querySelector('.hotellist_wrap').innerHTML += strip | |
})) | |
} | |
Promise.all(all_pag).then(extract) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment