Created
March 7, 2021 15:23
-
-
Save khaledosman/6ea6db4799ee3e2694e2ebff12d8f40d to your computer and use it in GitHub Desktop.
replace all links of all md files in a folder recursively with a new downloaded file link
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const path = require('path') | |
const http = require('https') | |
const { promisify } = require('util') | |
const promisifiedReadFile = promisify(fs.readFile) | |
const promisifiedWriteFile = promisify(fs.writeFile) | |
// loops over all files in a directory recursively and returns filepaths to .md files | |
function getMdFilePaths (startPath) { | |
if (!fs.existsSync(startPath)) { | |
throw new Error(`directory ${startPath} does not exist't exist`) | |
} | |
const files = fs.readdirSync(startPath) | |
return files.flatMap(file => { | |
const filename = path.join(startPath, file) | |
const pathextend = path.parse(filename) | |
const stat = fs.lstatSync(filename) | |
if (stat.isDirectory()) { | |
return getMdFilePaths(filename) // recurse | |
} else if (pathextend.ext === '.md') { | |
return filename | |
} else { | |
return null | |
} | |
}) | |
.filter(Boolean) | |
} | |
function processMdFiles (mdFiles) { | |
return Promise.all( | |
mdFiles.map(async (filePath) => processMdFile(filePath, mdFiles)) | |
) | |
} | |
async function processMdFile (filePath) { | |
// copied from https://www.regextester.com/96504 | |
const URL_REGEX = /(?:(?:https?|ftp):\/\/|\b(?:[a-z\d]+\.))(?:(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))?\))+(?:\((?:[^\s()<>]+|(?:\(?:[^\s()<>]+\)))?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))?/ | |
const fileContent = await promisifiedReadFile(filePath, { encoding: 'utf8' }) | |
const directoryOfFile = path.dirname(filePath) | |
console.log('processing ' + filePath) | |
const match = fileContent.match(URL_REGEX) | |
console.log({ match }) | |
if (match) { | |
const matchedUrl = match[0] | |
const newFilePath = await downloadFile(matchedUrl, directoryOfFile) | |
console.log('file downloaded', newFilePath) | |
const newFileContent = fileContent.replace(URL_REGEX, newFilePath) | |
console.log('new content', newFileContent) | |
console.log(`replacing ${filePath} with ${newFileContent}`) | |
// TODO uncomment this to replace the original files with the new content | |
await promisifiedWriteFile(filePath, newFileContent) | |
return newFileContent | |
} else { | |
return fileContent | |
} | |
} | |
async function downloadFile (url, outputPath) { | |
return new Promise((resolve, reject) => { | |
// get file name from response https://stackoverflow.com/questions/20132064/node-js-download-file-using-content-disposition-as-filename | |
http.get(url, (response) => { | |
const fileNameFromContentDispositionHeader = response.headers['content-disposition'] ? response.headers['content-disposition'].split('\'\'') : null | |
const filename = fileNameFromContentDispositionHeader ? fileNameFromContentDispositionHeader[1] : `document ${Math.random()}.html` // response.headers.date | |
const newFilePath = path.join(outputPath, filename) | |
const writeStream = fs.createWriteStream(newFilePath) | |
response.pipe(writeStream) | |
writeStream.on('finish', () => { | |
// resolve the promise with the filePath once the file is created | |
writeStream.close(() => { | |
resolve(newFilePath) | |
}) | |
}) | |
writeStream.on('error', (err) => { | |
// delete the corrupted file and reject the promise if an error occurs | |
// fs.unlink(outputPath) | |
reject(err) | |
}) | |
}) | |
}) | |
} | |
// run the program | |
(async function main () { | |
const START_PATH = 'root' | |
const mdFiles = getMdFilePaths(START_PATH) | |
console.log({ mdFiles }) | |
await processMdFiles(mdFiles) | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment