Last active
February 5, 2025 00:40
-
-
Save mauriciogior/1635c35e306ac08f7afc3c1863b312a3 to your computer and use it in GitHub Desktop.
Busca PlacasBrasil
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "placasbrasil-search", | |
"version": "1.0.0", | |
"description": "PlacasBrasil Search", | |
"main": "search.js", | |
"scripts": { | |
"search": "node search.js" | |
}, | |
"dependencies": { | |
"cheerio": "^1.0.0", | |
"node-fetch": "^3.3.2" | |
}, | |
"type": "module" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fetch from 'node-fetch'; | |
import * as cheerio from 'cheerio'; | |
async function getVehicleInfo(plate) { | |
try { | |
const response = await fetch(`https://placasbrasil.com/placa/${plate}`, { | |
headers: { | |
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", | |
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
} | |
}); | |
const html = await response.text(); | |
const $ = cheerio.load(html); | |
const vehicleInfo = {}; | |
// Parse each card-detail div | |
$('.card-detail').each((i, element) => { | |
const label = $(element).find('strong').text().replace(':', '').trim(); | |
let value = $(element).find('span').text().trim(); | |
// Skip locked/hidden values | |
if (value.includes('Desbloquear')) { | |
value = null; | |
} | |
// Convert label to camelCase key | |
const key = label | |
.toLowerCase() | |
.replace(/[^\w\s]/g, '') | |
.replace(/\s+(.)/g, (match, group) => group.toUpperCase()); | |
vehicleInfo[key] = value; | |
}); | |
return vehicleInfo; | |
} catch (error) { | |
console.error('Error fetching vehicle info:', error); | |
throw error; | |
} | |
} | |
// Example usage | |
// getVehicleInfo('BTA2D24').then(console.log).catch(console.error); | |
export { getVehicleInfo }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { getVehicleInfo } from './parser.js'; | |
import fs from 'fs/promises'; | |
const MAX_CONCURRENT_REQUESTS = 10; | |
async function processPlatesInBatch(plates) { | |
return Promise.all( | |
plates.map(async (plate) => { | |
try { | |
console.log(`Fetching data for plate: ${plate}`); | |
const vehicleInfo = await getVehicleInfo(plate); | |
return { | |
plate, | |
...vehicleInfo, | |
error: null | |
}; | |
} catch (error) { | |
console.error(`Error processing plate ${plate}:`, error.message); | |
return { | |
plate, | |
error: error.message | |
}; | |
} | |
}) | |
); | |
} | |
function convertToCSV(results) { | |
if (results.length === 0) return ''; | |
// Get all possible headers from all results | |
const headers = ['plate', 'error', ...new Set( | |
results.flatMap(obj => Object.keys(obj)) | |
.filter(key => key !== 'plate' && key !== 'error') | |
)]; | |
// Create CSV header row | |
const csvRows = [headers.join(',')]; | |
// Add data rows | |
for (const result of results) { | |
const row = headers.map(header => { | |
const value = result[header] || ''; | |
// Escape commas and quotes in values | |
return `"${String(value).replace(/"/g, '""')}"`; | |
}); | |
csvRows.push(row.join(',')); | |
} | |
return csvRows.join('\n'); | |
} | |
async function saveProgress(results, outputPath, format) { | |
const output = format === 'json' | |
? JSON.stringify(results, null, 2) | |
: convertToCSV(results); | |
await fs.writeFile(outputPath, output); | |
console.log(`Progress saved to ${outputPath} in ${format.toUpperCase()} format`); | |
} | |
async function processPlates(inputPath, outputPath, format = 'json') { | |
try { | |
// Validate format | |
format = format.toLowerCase(); | |
if (!['json', 'csv'].includes(format)) { | |
throw new Error('Format must be either "json" or "csv"'); | |
} | |
// Read and parse input file | |
const fileContent = await fs.readFile(inputPath, 'utf-8'); | |
// Track statistics | |
const stats = { | |
invalidPlates: 0, | |
duplicates: 0, | |
successful: 0, | |
failed: 0, | |
originalCount: fileContent.split(/[\s,]+/).filter(Boolean).length | |
}; | |
// Split and clean plates first to count duplicates | |
const allPlates = fileContent | |
.split(/[\s,]+/) | |
.map(plate => plate.trim().toUpperCase()) | |
.filter(Boolean); | |
// Count invalid plates | |
allPlates.forEach(plate => { | |
if (plate.length !== 7) { | |
stats.invalidPlates++; | |
console.log(`Skipping invalid plate ${plate}: must be 7 characters`); | |
} | |
}); | |
// Get valid plates and count duplicates | |
const validPlates = allPlates.filter(plate => plate.length === 7); | |
stats.duplicates = validPlates.length - new Set(validPlates).size; | |
// Get final unique valid plates | |
const plates = [...new Set(validPlates)]; | |
console.log(`Processing ${plates.length} valid unique plates...`); | |
// Process plates in batches | |
const results = []; | |
for (let i = 0; i < plates.length; i += MAX_CONCURRENT_REQUESTS) { | |
const batch = plates.slice(i, i + MAX_CONCURRENT_REQUESTS); | |
console.log(`Processing batch of ${batch.length} plates (${i + 1}-${Math.min(i + MAX_CONCURRENT_REQUESTS, plates.length)} of ${plates.length})`); | |
const batchResults = await processPlatesInBatch(batch); | |
results.push(...batchResults); | |
// Update success/failure stats | |
batchResults.forEach(result => { | |
if (result.error) { | |
stats.failed++; | |
} else { | |
stats.successful++; | |
} | |
}); | |
// Save progress after each batch if CSV format | |
if (format === 'csv') { | |
await saveProgress(results, outputPath, format); | |
} | |
// Add a small delay between batches to be nice to the server | |
if (i + MAX_CONCURRENT_REQUESTS < plates.length) { | |
await new Promise(resolve => setTimeout(resolve, 1000)); | |
} | |
} | |
// Final save | |
await saveProgress(results, outputPath, format); | |
// Display final statistics | |
console.log('\nProcessing Summary:'); | |
console.log('------------------'); | |
console.log(`Original input count: ${stats.originalCount} plates`); | |
console.log(`Skipped (invalid format): ${stats.invalidPlates} plates`); | |
console.log(`Skipped (duplicates): ${stats.duplicates} plates`); | |
console.log(`Successfully processed: ${stats.successful} plates`); | |
console.log(`Failed to process: ${stats.failed} plates`); | |
console.log(`Total processed: ${stats.successful + stats.failed} plates`); | |
console.log('------------------'); | |
} catch (error) { | |
console.error('Fatal error:', error); | |
process.exit(1); | |
} | |
} | |
// Get command line arguments | |
const args = process.argv.slice(2); | |
if (args.length < 2 || args.length > 3) { | |
console.error('Usage: npm run search <input-file> <output-file> [format]'); | |
console.error('Format can be either "json" or "csv" (defaults to json)'); | |
process.exit(1); | |
} | |
const [inputPath, outputPath, format = 'json'] = args; | |
processPlates(inputPath, outputPath, format); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment