|
/**! |
|
* @author (c)2015-2020, CLosk, www.closk.design |
|
*/ |
|
|
|
console.clear(); |
|
|
|
var mdGen = { |
|
transHdr: true, // заготовка имени файла из первого заголовка транслитом или как есть |
|
|
|
genOpts: { |
|
// #0 -- |
|
'professionali.ru': { |
|
selMain: '.b-box-part', // container |
|
toRemove: [ |
|
// selectors of adjasent childs to be removed here and go |
|
'.h-overflow', |
|
'#inpage_VI-183754-0', |
|
], |
|
}, |
|
// #1 -- adme.ru |
|
'adme.ru': { |
|
// container |
|
selMain: 'article.article', |
|
// selectors of adjasent child tags to be removed here and go |
|
toRemove: ['#js-article-share-top'], |
|
}, |
|
// #2 -- proglib.io |
|
'proglib.io': { |
|
selMain: '.td-post-title,.td-post-content', |
|
toRemove: ['time', 'ins', '.crayon-table', '.td-module-meta-info', 'form', 'noscript', 'td-a-rec'], |
|
noHeader: true, |
|
}, |
|
// #3 -- lifehacker.ru |
|
'lifehacker.ru': { |
|
selMain: '.post-content', |
|
toRemove: ['.meta-info', 'social-and-date'], |
|
}, |
|
// #4 -- |
|
'upworktestru.com': { |
|
selMain: 'header, div.entry-content', |
|
toRemove: ['div'], |
|
preCorrect(doc) { |
|
doc.find('span[style="background-color: #00ff00;"]').each(function() { |
|
// let $t = jQuery(this); |
|
// $t.text($t.text().replace(/^ *•/, "- [x] $&")); |
|
this.innerText = this.innerText.replace('•', '- [x] $&'); |
|
return this; |
|
}); |
|
doc.find('strong').each(function() { |
|
// let $t = jQuery(this); |
|
// $t.text($t.text().replace('•', "- [] $&") |
|
this.innerText = this.innerText.replace('•', '- [] $&'); |
|
}); |
|
// doc.find('h3 br').remove(); |
|
doc.find('h3').each(function() { |
|
let $t = jQuery(this); |
|
$t.replaceWith('<pre><br/>' + $t.html() + '<br/></pre>'); |
|
}); |
|
}, |
|
finalClean(text) { |
|
return text |
|
.replace(/<!--.*?-->/g, '') |
|
.replace(/\*{2}/g, '') |
|
.replace(/ •/g, '') |
|
.replace(/### Answers:/g, '**$&**') |
|
.replace(/(\*{2})#{3} /g, '$1') |
|
.replace(/»/g, '"') |
|
.replace(/«/g, '"') |
|
.replace(/‘/g, "'") |
|
.replace(/’/g, "'") |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/^ +-/g, '-') |
|
.replace(/^\n{3,}/, '\n\n') |
|
.replace(/^ *(## \d+\.) */g, '$1\n\n') |
|
.replace(/<\/?span.*?> */g, ''); |
|
}, |
|
}, |
|
// #5 digitalocean -- tutorial-content |
|
// https://www.digitalocean.com/community/tutorials/ |
|
'www.digitalocean.com': { |
|
selMain: '.content-title, .tutorial-content', |
|
toRemove: [], |
|
preCorrect: function(doc) { |
|
let h1 = doc.filter('.content-title').get(0); |
|
h1.innerHTML = `# ${h1.innerText}`; |
|
|
|
doc.find('.secondary-code-label ').each(function() { |
|
this.innerHTML = `### ${this.innerText}\n\n`; |
|
}); |
|
doc.find('.code-label').each(function() { |
|
this.outerHTML = `\n\n\<p><tt>${this.innerText}</tt></p>\n\n`; |
|
}); |
|
doc.find('pre').each(function() { |
|
this.outerText = `\n\`\`\`\n${this.innerText.trim()}\n\`\`\`\n`; |
|
}); |
|
}, |
|
finalClean: function(text) { |
|
return text.replace(/</g, '<').replace(/>/g, '>'); |
|
}, |
|
}, |
|
// #6 vscale Dox |
|
// https://community.vscale.io/hc/ru/community/topics/200563745-Руководства |
|
'community.vscale.io': { |
|
selMain: 'h1, .post-body', |
|
toRemove: [], |
|
preCorrect: function(doc) { |
|
doc.find('pre').each(function() { |
|
this.outerText = `\n\`\`\`\n${this.innerText.trim()}\n\`\`\`\n`; |
|
}); |
|
}, |
|
finalClean: function(text) { |
|
return text.replace(/^\$ /g, ''); |
|
}, |
|
}, |
|
// #7 www.opennet.ru |
|
'www.opennet.ru': { |
|
selMain: '.NAVHEADER th, .SECT1', |
|
toRemove: ['a[name]'], |
|
preCorrect(doc) {}, |
|
// finalClean(text) {return text} |
|
}, |
|
// #8 medium.com |
|
'medium.com': { |
|
selMain: '.section-inner', |
|
toRemove: ['span', '.followState', '.js-followState', 'canvas'], |
|
finalClean(text) { |
|
return text |
|
.replace(/<\/?(figure|noscript|time).*?>[ \t]*/gi, '') |
|
.replace(/``` `(.*?)` ```/gm, '\n```bash\n$1\n```\n') |
|
.replace(/>/g, '>') |
|
.replace(/</g, '<') |
|
.replace(/[ \t]+$/g, ''); |
|
}, |
|
}, |
|
// #9 dockercheatsheet.painlessdocker.com |
|
'dockercheatsheet.painlessdocker.com': { |
|
selMain: 'main', |
|
toRemove: [], |
|
titleToHeader: true, |
|
// noHeader: true, |
|
preCorrect(doc) {}, |
|
finalClean(text) { |
|
return text |
|
.replace(/``` `(.*?)` ```/gm, '\n```bash\n$1\n```\n') |
|
.replace(/<\/?section.*?>[ \t]*/g, '') |
|
.replace(/<!.*?>/g, '') |
|
.replace(/###### /g, ''); |
|
}, |
|
}, |
|
// #10 dev.to |
|
'dev.to': { |
|
selMain: '#article-body', |
|
toRemove: ['meta'], |
|
finalClean(text) { |
|
return text.replace(/<\/?(sub|span)[^>]*>/gi, ''); |
|
}, |
|
}, |
|
// #11 habr.com |
|
'habr.com': { |
|
selMain: '.post__text', |
|
selTitle: '.post__title', |
|
toRemove: [], |
|
titleToHeader: true, |
|
noHeader: false, |
|
preCorrect() {}, |
|
finalClean(text) { |
|
return text.replace(/<\/?(sub|span)[^>]*>/gi, ''); |
|
}, |
|
}, |
|
}[window.location.host], |
|
|
|
start(settings = {}) { |
|
let o = { ...this.genOpts, ...settings }; |
|
let toRemove = o.toRemove || []; |
|
toRemove.push('script', 'style', 'iframe'); |
|
|
|
const picLbl = 'pic'; |
|
const lnkLbl = 'lnk'; |
|
|
|
// const $selMain = {...jQuery(o.selMain)}; |
|
const $selMain = jQuery(o.selMain).clone(); |
|
$selMain.remove(o.toRemove.join(',')); |
|
|
|
if (o.preCorrect !== undefined) o.preCorrect($selMain); |
|
|
|
// Сменилась основная техника обработки, поэтому |
|
var text = ''; |
|
$selMain.each(function(i) { |
|
text += $(this).html(); |
|
}); |
|
|
|
var images = []; |
|
var links = []; |
|
var $textes = $selMain.find('textarea'); |
|
var idxTextes = 0; |
|
|
|
var res = text |
|
.replace(/ /g, ' ') |
|
.replace(/<(textarea\b)[^>]*?>(.*?)<\/\1>/gi, function(m, tag, content) { |
|
return '\n```\n' + $textes[idxTextes++].value + '\n```\n'; |
|
}) |
|
.replace(/<[/]?(pre)\b[^>]*>/gi, '\n```\n') |
|
.replace(/<[/]?(code|tt)\b[^>]*>/gi, '`') |
|
.replace(/<[/]?(strong|b)\b[^>]*>/gi, '**') |
|
.replace(/<[/]?(em|i)\b[^>]*>/gi, '_') |
|
.replace(/\s+/gm, ' ') |
|
.replace(/\s*<br *\/?>\s*/gi, '\n') |
|
// .replace(/<\/?span[^>]*>/gi, "") |
|
.replace(/<(h\d|p|blockquote|[ou]l|li)\b/gi, '\n\n$&') |
|
.replace(/<\/(h\d|p|blockquote|[ou]l|li)>/gi, '$&\n\n') |
|
// .replace(/<[/]?textarea\b[^>]*>/gi, '\n```\n') |
|
.replace(/<h(\d)[^>]*>\s*/gi, function(w, m1) { |
|
let d = parseInt(m1); |
|
return '#'.repeat(d) + ' '; |
|
}) |
|
.replace(/<a([^>]*)>(.*?)<\/a>/gi, function(m0, w, text) { |
|
let m = w.match(/href=(['"])(.*?)\1/im); |
|
let href = (m && m[2]) || ''; |
|
if (href === text) return `<${text}>`; |
|
if (!href || href[0] == '#') return text.trim(); |
|
if (href.search(/^\/\b/) + 1) href = location.origin + href; |
|
links.push(href); |
|
return `[${text}][${lnkLbl}${links.length}]`; |
|
}) |
|
.replace(/<img[^>]*>/gi, function(w) { |
|
let m = w.match(/src=(['"])(.*?)\1/im); |
|
let src = (m && m[2]) || ''; |
|
if (!src) return ''; |
|
if (src[0] == '/') src = location.origin + src; |
|
m = w.match(/alt=(['"])(.*?)\1/im); |
|
let alt = (m && m[2]) || ''; |
|
m = w.match(/title=(['"])(.*?)\1/im); |
|
let title = (m && m[2]) || ''; |
|
images.push(src); |
|
return `![${title ? title : alt}][${picLbl}${images.length}]`; |
|
}) |
|
.replace(/<(blockquote)([^>]*)>(.*?)<\/\1>/gim, function(match, tag, attrs, content) { |
|
return '> ' + content.trim(); |
|
}) |
|
.replace(/<(li)([^>]*)>(.*?)<\/\1>/gim, function(match, tag, attrs, content) { |
|
return '\n- ' + content.trim(); |
|
}) |
|
.replace(/<[/]?(nobr|div|p|h\d|[uo]l|yobject|section|main|nav)\b[^>]*>/gi, ' ') |
|
.replace(/<hr[^>]*>/gi, '\n---\n') |
|
.replace(/^[ \t]+/gm, ''); |
|
|
|
// if (o.finalClean !== undefined) |
|
res = o.finalClean(res) || res; |
|
|
|
if (links.length) { |
|
let cnt = 1; |
|
res += '\n'.repeat(2); |
|
for (let i of links) res += `[${lnkLbl}${cnt++}]: ${i}\n`; |
|
} |
|
|
|
if (images.length) { |
|
let cnt = 1; |
|
res += '\n'.repeat(2); |
|
for (let i of images) res += `[${picLbl}${cnt++}]: ${i}\n`; |
|
} |
|
|
|
// взять заголовок документа из тэга TITLE, |
|
// а все остальные заголовки понизить уровнем |
|
if (o.titleToHeader || o.selTitle) res = this.makeHeaderFromTitle(res, o.selTitle); |
|
|
|
// теперь вхерачим в первую строчку транслит от первого заголовка |
|
// чтобы помочь текстовым редакторам назначать имя файла по умолчанию |
|
// для сохранения |
|
if (!o.noHeader) res = this.makeFileName(res); |
|
|
|
// подчистим напоследок |
|
res = res.replace(/( *\n){3,}/g, '\n\n'); |
|
|
|
copy(res); |
|
console.log(res.substr(0, 300)); |
|
console.log('%c↔↔↔↔↔', 'color:blue'); |
|
return `${this.fullTime()} _mdGen.JS has Finished`; |
|
}, |
|
|
|
makeHeaderFromTitle(text, selTitle) { |
|
let title = !!selTitle ? document.querySelector(selTitle).innerText : document.getElementsByTagName('title')[0].innerText; |
|
return text.replace(/^#/g, '##').replace(/^/, `# ${title}\n\n`); |
|
}, |
|
|
|
makeFileName(text) { |
|
return text.replace( |
|
/^\s*#+[ \t]+(.+)\n/, |
|
(w, fstHdr) => |
|
(this.transHdr ? this.translit(fstHdr) : fstHdr) |
|
.replace(/[ _]+/g, '_') |
|
.replace(/[:.]/g, '–') |
|
.replace(/['"]/g, '') |
|
.replace(/(_+-|-_+)/g, '-') |
|
.replace(/[_+%,.–\-]+$/, '') + |
|
'.md\n\n' + |
|
w |
|
); |
|
}, |
|
|
|
tmeOpts: { |
|
day: '2-digit', |
|
hour: '2-digit', |
|
minute: '2-digit', |
|
month: 'long', |
|
second: '2-digit', |
|
timeZoneName: 'short', |
|
weekday: 'short', |
|
year: 'numeric', |
|
}, |
|
|
|
aTrans: {А: 'a', а: 'a', Б: 'B', б: 'b', В: 'V', в: 'v', Г: 'G', г: 'g', |
|
Д: 'D', д: 'd', Е: 'E', е: 'e', Ж: 'ZH', ж: 'zh', З: 'Z', з: 'z', И: 'I', |
|
и: 'i', Й: 'I', й: 'i', К: 'K', к: 'k', Л: 'L', л: 'l', М: 'M', м: 'm', |
|
Н: 'N', н: 'n', О: 'O', о: 'o', П: 'P', п: 'p', Р: 'R', р: 'r', С: 'S', |
|
с: 's', Т: 'T', т: 't', У: 'U', у: 'u', Ф: 'F', ф: 'f', Х: 'H', х: 'h', |
|
Ц: 'TS', ц: 'ts', Ч: 'CH', ч: 'ch', Ш: 'SH', ш: 'sh', Щ: 'SCH', щ: 'sch', |
|
Ъ: '”', ъ: '”', Ы: 'Y', ы: 'y', Ь: '’', ь: '’', Э: 'E', э: 'e', ю: 'yu', |
|
Ю: 'YU', Я: 'Ya', я: 'ya', Ё: 'YO', ё: 'yo', }, |
|
|
|
translit(str) { |
|
return str |
|
.split('') |
|
.map(char => this.aTrans[char] || char) |
|
.join(''); |
|
}, |
|
|
|
fullTime(settings = {}) { |
|
return new Date().toLocaleString('ru', Object.assign({}, this.tmeOpts, settings)); |
|
}, |
|
|
|
stripMarkup(text) { |
|
return text.replace(/<[^>]*>/g, ''); |
|
}, |
|
|
|
stripTags(tags, text) { |
|
if (typeof tags === 'string') tags = tags.split(/[, |;\.]/); |
|
if (Array.isArray(tags)) tags = tags.join('|'); |
|
else return text; |
|
re = new RegExp(`</?(${tags})[^>]*>`, 'gi'); |
|
return text.replace(re, ''); |
|
}, |
|
}; |
|
|
|
mdGen.start(); |