Skip to content

Instantly share code, notes, and snippets.

@eduardomozart
Last active January 13, 2025 12:11
Show Gist options
  • Save eduardomozart/feb4bbef30a47db1439d3ea9c03f28e0 to your computer and use it in GitHub Desktop.
Save eduardomozart/feb4bbef30a47db1439d3ea9c03f28e0 to your computer and use it in GitHub Desktop.
Grabber Sankaku
function buildSearch(search) {
// Replace "ratio:4:3" meta by "4:3_aspect_ratio" tag
search = search.replace(/(^| )ratio:(\d+:\d+)($| )/g, "$1$2_aspect_ratio$3");
return search;
}
function buildImageFromJson(img) {
img.created_at = img.created_at["s"];
img.score = img.total_score;
img.author = img.author.name;
return completeImage(img, true);
}
function completeImage(img, fromJson) {
if ((!img.file_url || img.file_url.length < 5) && img.preview_url) {
img.file_url = img.preview_url.replace("/preview/", "/");
}
if (img.file_url && !fromJson) {
img.file_url = img.file_url.replace(/([^s])\.sankakucomplex/, "$1s.sankakucomplex");
}
return img;
}
function escapeHtml(unsafe)
{
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
export var source = {
name: "Sankaku",
modifiers: ["rating:safe", "rating:questionable", "rating:explicit", "user:", "fav:", "fastfav:", "md5:", "source:", "id:", "width:", "height:", "score:", "mpixels:", "filesize:", "date:", "gentags:", "arttags:", "chartags:", "copytags:", "approver:", "parent:", "sub:", "order:id", "order:id_desc", "order:score", "order:score_asc", "order:mpixels", "order:mpixels_asc", "order:filesize", "order:landscape", "order:portrait", "order:favcount", "order:rank", "order:change", "order:change_desc", "parent:none", "unlocked:rating"],
tagFormat: {
case: "lower",
wordSeparator: "_",
},
searchFormat: {
and: " ",
or: {
separator: " ",
prefix: "~",
},
parenthesis: false,
precedence: "or",
},
auth: {
url: {
type: "url",
fields: [
{
id: "pseudo",
key: "login",
},
{
id: "password",
type: "password",
},
{
key: "password_hash",
type: "hash",
hash: "sha1",
salt: "choujin-steiner--%password%--",
},
{
key: "appkey",
type: "hash",
hash: "sha1",
salt: "sankakuapp_%pseudo:lower%_Z5NE9YASej",
},
],
check: {
type: "max_page",
value: 50,
},
},
oauth2: {
type: "oauth2",
authType: "password_json",
tokenUrl: "https://capi-v2.sankakucomplex.com/auth/token",
},
},
apis: {
json: {
name: "JSON",
auth: [],
maxLimit: 200,
search: {
url: function (query, opts, previous) {
var baseUrl = opts.baseUrl
.replace("//chan.", "//capi-v2.")
.replace("//idol.", "//iapi.");
var pagePart = Grabber.pageUrl(query.page, previous, opts.loggedIn ? 1000 : 50, "page={page}", "prev={max}", "next={min-1}");
var search = buildSearch(query.search);
var url = baseUrl + "/posts?lang=en&" + pagePart + "&limit=" + opts.limit + "&tags=" + encodeURIComponent(search);
return {
url: url,
headers: {
Accept: "application/vnd.sankaku.api+json;v=2",
},
};
},
parse: function (src) {
var data = JSON.parse(src);
var images = data.map(buildImageFromJson);
var match_tags = {
0: 'general',
1: 'artist',
2: 'studio',
3: 'copyright',
4: 'character',
5: 'species',
8: 'medium',
9: 'meta'
};
const obj = JSON.parse(src);
tags = []
for (i in obj['tags']){
datas = obj['tags'][i]
tags.push({"name": datas['name'], "count": datas['count'], "type" : match_tags[datas['type']]})
};
return {
images: images,
tags: tags,
};
},
},
details: {
url: function (id, md5, opts) {
var baseUrl = opts.baseUrl
.replace("//chan.", "//capi-v2.")
.replace("//idol.", "//iapi.");
return baseUrl + "/posts/" + id;
},
parse: function (src) {
var data = JSON.parse(src);
var match_tags = {
0: 'general',
1: 'artist',
2: 'studio',
3: 'copyright',
4: 'character',
5: 'species',
8: 'medium',
9: 'meta'
};
const obj = JSON.parse(src);
tags = []
for (i in obj['tags']){
datas = obj['tags'][i]
tags.push({"name": datas['name'], "count": datas['count'], "type" : match_tags[datas['type']]})
};
return {
tags: tags,
source: "https://chan.sankakucomplex.com/en/posts/" + JSON.parse(src)['id'],
};
},
}
},
html: {
name: "Regex",
auth: [],
forcedLimit: 20,
forcedTokens: ["*"],
search: {
url: function (query, opts, previous) {
try {
var pagePart = Grabber.pageUrl(query.page, previous, opts.loggedIn ? 50 : 25, "page={page}", "prev={max}", "next={min-1}");
var search = buildSearch(query.search);
return "/en/posts?" + pagePart + "&tags=" + encodeURIComponent(search);
}
catch (e) {
return { error: e.message };
}
},
parse: function (src) {
src = src.replace(/<div class="?popular-preview"?>[\s\S]+?<\/div>/g, "");
src = src.replace(/<div class="?carousel-data carousel-data-ai"? >[\s\S]+?<\/div>/g, "");
// console.log(escapeHtml(src));
/* var searchImageCounts = Grabber.regexMatches('class="?tag-(?:count|type-none)"? title="Post Count: (?<count>[0-9,]+)"', src);
var lastPage = Grabber.regexToConst("page", '<span class="?current"?>\\s*(?<page>[0-9,]+)\\s*</span>\\s*>>\\s*</div>', src);
var wiki = Grabber.regexToConst("wiki", '<div id="?wiki-excerpt"?[^>]*>(?<wiki>.+?)</div>', src);
wiki = wiki ? wiki.replace(/href="\/wiki\/show\?title=([^"]+)"/g, 'href="$1"') : undefined;
return {
tags: Grabber.regexToTags('<li class="?[^">]*tag-type-(?<type>[^">]+)(?:|"[^>]*)>.*?<a href="[^"]+"[^>]*>(?<name>[^<]+)</a>.*?<span class="?post-count"?>(?<count>\\d+)</span>.*?</li>', src),
// images: Grabber.regexToImages('<span[^>]* id="?p(?<id>\\d+)"?><a[^>]*><img[^>]* src="(?<preview_url>[^"]+/preview/\\w{2}/\\w{2}/(?<md5>[^.]+)\\.[^"]+|[^"]+/download-preview.png)" title="(?<tags>[^"]+)"[^>]+></a></span>', src).map(function (img) { return completeImage(img, false); }),
images: Grabber.regexToImages('<article[^>]* data-id="(?<id>[^">]+)"[^>]*>.*?<img[^>]*src="(?<preview_url>[^"]+/preview/\\w{2}/\\w{2}/(?<md5>[^.]+)\\.[^"]+|[^"]+/download-preview.png)"[^>]*data-auto_page="(?<tags>[^"]+)"[^>]*>', src).map(function (img) { return completeImage(img, false); }),
wiki: wiki,
pageCount: lastPage ? Grabber.countToInt(lastPage) : undefined,
imageCount: searchImageCounts.length === 1 ? Grabber.countToInt(searchImageCounts[0].count) : undefined,
}; */
var html = Grabber.parseHTML(src);
var articles = html.find(".posts-container article");
var images = [];
for (var _i = 0, articles_1 = articles; _i < articles_1.length; _i++) {
var article = articles_1[_i];
var image = {
id: article.attr("data-id"),
// page_url: "/en/posts/" + article.attr("data-id"),
preview_url: article.find("img.post-preview-image")[0].attr("src"),
md5: Grabber.regexToConst("md5", '[^"]+/preview/\\w{2}/\\w{2}/(?<md5>[^.]+)', article.find("img")[0].attr("src")),
tags: article.find("img")[0].attr("data-auto_page"),
has_children: article.find("img")[0].attr("class").includes("has-children") ? true : false,
width: Grabber.regexToConst("width", '\\s*Size:(?<width>\\d+)x', article.find("img")[0].attr("data-auto_page")),
height: Grabber.regexToConst("height", '\\s*Size:(\\d+)x(?<height>\\d+)', article.find("img")[0].attr("data-auto_page"))
};
// console.log(image);
images.push(image);
}
// console.log(images[0].id)
return {
images: images,
// imageCount: Grabber.regexToConst("count", "Showing \\d+ - \\d+ of (?<count>\\d+)", src),
};
},
},
details: {
/* fullResults: true, */
url: function (id, md5) {
return "/post/show/" + md5;
},
parse: function (src) {
/* return {
pools: Grabber.regexToPools('<div class="status-notice" id="pool\\d+">[^<]*Pool:[^<]*(?:<a href="/post/show/(?<previous>\\d+)" >&lt;&lt;</a>)?[^<]*<a href="/pool/show/(?<id>\\d+)" >(?<name>[^<]+)</a>[^<]*(?:<a href="/post/show/(?<next>\\d+)" >&gt;&gt;</a>)?[^<]*</div>', src),
// tags: Grabber.regexToTags('<li class="?[^">]*tag-type-(?<type>[^">]+)(?:|"[^>]*)>.*?<a href="[^"]+"[^>]*>(?<name>[^<]+)</a>.*?<span class="?post-count"?>(?<count>\\d+)</span>.*?</li>', src),
tags: Grabber.regexToTags('<li class="?[^">]*tag-type-(?<type>[^">]+)(?:|"[^>]*)>.*?<a[^>]*data-count="(?<count>[^"]+)"[^>]*>(?<name>[^<]+)<\/a>.*?<\/li></name>', src),
// imageUrl: Grabber.regexToConst("url", '<li>Original: <a href="(?<url>[^"]+)"|<a href="(?<url_2>[^"]+)">Save this file', src).replace(/&amp;/g, "&"),
imageUrl: Grabber.regexToConst("url", '<span[^>]*>Original:[^>]*>[^<]*<a[^>]*href="(?<url>[^"]+)"', src).replace(/&amp;/g, "&"),
createdAt: Grabber.regexToConst("date", '<a href="/\\?tags=date[^"]+" title="(?<date>[^"]+)">', src),
}; */
// console.log(src);
var html = Grabber.parseHTML(src);
var tagsEls = html.find('li[class^="tag-type"]');
var tags = [];
for (var _i = 0, tagsEls_1 = tagsEls; _i < tagsEls_1.length; _i++) {
var tagEl = tagsEls_1[_i];
var tag = {
type: Grabber.regexToConst("type", 'tag-type-(?<type>[^">]+)(?:|"[^>]*)', tagEl.attr("class")),
count: tagEl.find("a")[0].attr("data-count"),
name: tagEl.find("a")[0].innerText().trim()
}
tags.push(tag);
}
var file = {
createdAt: undefined,
has_comments: html.find(".no-comments").length > 0 ? false : true
}
if (html.find("#post-score-" + file.id).length > 0) {
file.score = html.find("#post-score-" + file.id)[0].innerText().trim();
}
if (html.find("#highres").length > 0) {
download = html.find("#highres")[0];
download_text = download.innerText().trim();
download_regex = "(?<width>\\d+)x(?<height>\\d+) \\((?<filesize>[^ ]+ [KM]B)";
file.imageUrl = download.attr("href");
/* file.width = Grabber.regexToConst("width", download_regex, download_text);
file.height = Grabber.regexToConst("height", download_regex, download_text); */
file.file_size = Grabber.fileSizeToInt(Grabber.regexToConst("filesize", download_regex, download_text));
}
var stats = html.find("div#stats a");
for (var _i = 0, stats_1 = stats; _i < stats_1.length; _i++) {
var stat = stats_1[_i];
if (stat.attr("onclick").includes("prepare_download") && html.find("#highres").length === 0) {
file.imageUrl = Grabber.regexToConst("url", "Post\.prepare_download\('(?<url>.*?)'[^>]*", stat.attr("onclick"));
}
if (stat.attr("href").includes("date")) {
file.createdAt = Grabber.regexToConst("date", '(?<date>[^"]+)', stat.attr("title"));
}
}
file.tags = tags;
// console.log(file);
// console.log(tags);
return file;
},
},
tagTypes: {
url: function () {
return "/tag/index";
},
parse: function (src) {
var contents = src.match(/<select[^>]* id=['"]?type['"]?[^>]*>([\s\S]+)<\/select>/);
if (!contents) {
return { error: "Parse error: could not find the tag type <select> tag" };
}
var results = Grabber.regexMatches('<option value="?(?<id>\\d+)"?>(?<name>[^<]+)</option>', contents[1]);
var types = results.map(function (r) { return ({
id: r.id,
name: r.name.toLowerCase(),
}); });
return { types: types };
},
},
tags: {
url: function (query, opts) {
return "/tag/index?language=en&order=" + query.order + "&page=" + query.page;
},
parse: function (src) {
return {
tags: Grabber.regexToTags('<tr[^>]*>\\s*<td[^>]*>(?<count>\\d+)</td>\\s*<td class="?tag-type-(?<type>[^">]+)"?>\\s*\\[<a[^>]+>\\?</a>\\]\\s*<a[^>]+>(?<name>.+?)</a>\\s*</td>', src),
};
},
},
check: {
url: function () {
return "/";
},
parse: function (src) {
return src.indexOf("Sankaku") !== -1;
},
},
},
},
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment