Skip to content

Instantly share code, notes, and snippets.

@mhauken
Created June 19, 2025 10:44
Show Gist options
  • Save mhauken/cb8ebabeb80ba453da8e394162629b0f to your computer and use it in GitHub Desktop.
Save mhauken/cb8ebabeb80ba453da8e394162629b0f to your computer and use it in GitHub Desktop.
PDF downloader
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const path = require('path');
async function downloadPDFs(url) {
try {
// Get webpage content
const response = await axios.get(url);
const $ = cheerio.load(response.data);
// Find all PDF links
const pdfLinks = new Set();
$('a').each((i, link) => {
const href = $(link).attr('href');
if (href && href.toLowerCase().endsWith('.pdf')) {
// Handle relative URLs
const absoluteUrl = new URL(href, url).href;
pdfLinks.add(absoluteUrl);
}
});
console.log(`Found ${pdfLinks.size} PDF links`);
// Create downloads directory if it doesn't exist
const downloadDir = 'downloads';
if (!fs.existsSync(downloadDir)) {
fs.mkdirSync(downloadDir);
}
// Download each PDF
for (const pdfUrl of pdfLinks) {
try {
const filename = path.basename(pdfUrl);
const filepath = path.join(downloadDir, filename);
console.log(`Downloading: ${filename}`);
const response = await axios({
method: 'get',
url: pdfUrl,
responseType: 'stream'
});
const writer = fs.createWriteStream(filepath);
response.data.pipe(writer);
await new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
console.log(`Downloaded: ${filename}`);
} catch (error) {
console.error(`Failed to download ${pdfUrl}:`, error.message);
}
}
console.log('All downloads completed!');
} catch (error) {
console.error('Error:', error.message);
}
}
// Check if URL is provided as command line argument
const url = process.argv[2];
if (!url) {
console.error('Please provide a URL as an argument');
console.log('Usage: node script.js https://example.com');
process.exit(1);
}
downloadPDFs(url);
{
"dependencies": {
"axios": "^1.7.8",
"cheerio": "^1.0.0"
}
}
node download-pdfs.js https://example.com
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment