Skip to content

Instantly share code, notes, and snippets.

@mindplay-dk
Created December 17, 2024 12:07
Show Gist options
  • Save mindplay-dk/110df1573ae1e32f5c219575355a2998 to your computer and use it in GitHub Desktop.
Save mindplay-dk/110df1573ae1e32f5c219575355a2998 to your computer and use it in GitHub Desktop.
a horrible script to convert the entire PHP manual from HTML files to Markdown
const fs = require('fs');
const path = require('path');
const TurndownService = require('turndown');
// Directories
const inputDir = path.join(__dirname, 'html');
const outputDir = path.join(__dirname, 'markdown');
// Initialize Turndown
const turndownService = new TurndownService({
codeBlockStyle: 'fenced',
});
const replaceWithFencedCodeBlock = (language) => (content, node, options) => {
// find <br/> elements in node and replace them with line breaks:
var brs = node.getElementsByTagName('br')
for (var i = 0; i < brs.length; i++) {
brs[i].textContent = '\n'
}
var code = node.firstChild.textContent
var fence = '```';
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
turndownService.addRule('phpCodeBlock', {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.className === 'phpcode' &&
node.firstChild &&
node.firstChild.nodeName === 'PRE' &&
node.firstChild.firstChild &&
node.firstChild.firstChild.nodeName === 'CODE'
)
},
replacement: replaceWithFencedCodeBlock('php')
});
turndownService.addRule('phpCodeExample', {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.className === 'examplescode'
)
},
replacement: replaceWithFencedCodeBlock('')
});
// Ensure output directory exists
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir);
}
// Read all HTML files
const files = fs.readdirSync(inputDir).filter(file => file.endsWith('.html'));
console.log(`Total files to process: ${files.length}`);
// Process each file
files.forEach((file, index) => {
const inputPath = path.join(inputDir, file);
const outputPath = path.join(outputDir, file.replace(/\.html$/, '.md'));
// Read HTML content
var htmlContent = fs.readFileSync(inputPath, 'utf8');
// replace `<code>` with `<pre><code>` in PHP code samples:
htmlContent = htmlContent.replace(
/<div class="phpcode"><code>(.*?)<\/code><\/div>/g,
'<div class="phpcode"><pre><code>$1</code></pre></div>'
);
// Convert to Markdown
const markdownContent = turndownService.turndown(htmlContent);
// Write Markdown file
fs.writeFileSync(outputPath, markdownContent);
console.log(`[${index + 1} of ${files.length}] ${inputPath} -> ${outputPath}`);
});
console.log('Conversion complete.');
// serialize all of the markdown files into a single output file `manual.md`:
const markdownFiles = fs.readdirSync(outputDir).filter(file => file.endsWith('.md'));
const manualPath = path.join(__dirname, 'manual.md');
// open the output file for writing:
const manualStream = fs.openSync(manualPath, 'w');
// write the content of each markdown file to the output file:
markdownFiles.forEach((file, index) => {
const filePath = path.join(outputDir, file);
const markdownContent = fs.readFileSync(filePath, 'utf8');
fs.appendFileSync(manualStream, markdownContent + "\n\n");
console.log(`[${index + 1} of ${markdownFiles.length}] ${filePath} -> ${manualPath}`);
});
// close the output file:
fs.closeSync(manualStream);
console.log('Manual file created.');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment