Created
December 17, 2024 06:46
-
-
Save PatelUtkarsh/c7384af624c2fa9a18b03549b0b530d8 to your computer and use it in GitHub Desktop.
Extract course code from training gov au - use node 20
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { writeFile } from 'fs/promises'; | |
async function fetchWithTimeout(url, timeout = 30000) { | |
const controller = new AbortController(); | |
const timeoutId = setTimeout(() => controller.abort(), timeout); | |
try { | |
const response = await fetch(url, { | |
signal: controller.signal, | |
headers: { | |
Accept: 'application/json', | |
'User-Agent': 'Node.js/20.13.1 (Training Data Fetcher)', | |
}, | |
}); | |
clearTimeout(timeoutId); | |
return response; | |
} catch (error) { | |
clearTimeout(timeoutId); | |
throw error; | |
} | |
} | |
async function getAllCourses() { | |
const PAGE_SIZE = 100; | |
const MAX_CONCURRENT_REQUESTS = 100; | |
const REQUEST_TIMEOUT = 3; | |
let allCourses = []; | |
try { | |
const initialResponse = await fetchWithTimeout( | |
`https://training.gov.au/api/search/training?api-version=1.0&searchText=&offset=0&pageSize=${PAGE_SIZE}&includeTotalCount=true`, | |
REQUEST_TIMEOUT | |
); | |
if (!initialResponse.ok) { | |
throw new Error(`HTTP error! status: ${initialResponse.status}`); | |
} | |
const initialData = await initialResponse.json(); | |
const totalCount = initialData.totalCount; | |
allCourses = [...extractCourseData(initialData.data)]; | |
console.log(`Total courses to fetch: ${totalCount}`); | |
console.log(`Fetched ${allCourses.length} courses...`); | |
const totalPages = Math.ceil(totalCount / PAGE_SIZE); | |
const remainingPages = totalPages - 1; | |
const chunks = []; | |
for (let i = 1; i <= remainingPages; i += MAX_CONCURRENT_REQUESTS) { | |
chunks.push( | |
Array.from( | |
{ length: Math.min(MAX_CONCURRENT_REQUESTS, remainingPages - i + 1) }, | |
(_, index) => i + index | |
) | |
); | |
} | |
for (const chunk of chunks) { | |
const chunkPromises = chunk.map(async (pageNum) => { | |
const offset = pageNum * PAGE_SIZE; | |
let retries = 3; | |
while (retries > 0) { | |
try { | |
const response = await fetchWithTimeout( | |
`https://training.gov.au/api/search/training?api-version=1.0&searchText=&offset=${offset}&pageSize=${PAGE_SIZE}&includeTotalCount=true`, | |
REQUEST_TIMEOUT | |
); | |
if (!response.ok) { | |
throw new Error(`HTTP error! status: ${response.status}`); | |
} | |
const data = await response.json(); | |
return extractCourseData(data.data); | |
} catch (error) { | |
retries--; | |
if (retries === 0) { | |
console.error(`Failed to fetch page ${pageNum} after 3 attempts:`, error.message); | |
return []; | |
} | |
console.warn(`Retrying page ${pageNum}, attempts left: ${retries}`); | |
await new Promise((resolve) => setTimeout(resolve, 2000)); | |
} | |
} | |
return []; | |
}); | |
try { | |
const results = await Promise.all(chunkPromises); | |
const newCourses = results.flat(); | |
allCourses = [...allCourses, ...newCourses]; | |
console.log(`Fetched ${allCourses.length} of ${totalCount} courses...`); | |
const delay = results.some((r) => r.length === 0) ? 2000 : 1000; | |
await new Promise((resolve) => setTimeout(resolve, delay)); | |
} catch (error) { | |
console.error('Error processing chunk:', error); | |
} | |
} | |
await saveToFile(allCourses); | |
console.log('\nFinal Results Summary:'); | |
console.log(`Total courses fetched: ${allCourses.length}`); | |
console.table(allCourses.slice(0, 10)); | |
return allCourses; | |
} catch (error) { | |
console.error('Error fetching courses:', error.message); | |
if (allCourses.length > 0) { | |
await saveToFile(allCourses); | |
} | |
return allCourses; | |
} | |
} | |
function extractCourseData(data) { | |
return data.map((course) => ({ | |
code: course.code, | |
title: course.title, | |
type: course.type?.name || 'N/A', | |
status: course.status?.name || 'N/A', | |
startDate: course.currencyPeriod?.startDate || 'N/A', | |
endDate: course.currencyPeriod?.endDate || 'N/A', | |
})); | |
} | |
async function saveToFile(courses) { | |
const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); | |
const filename = `courses_${timestamp}.json`; | |
try { | |
await writeFile(filename, JSON.stringify(courses, null, 2)); | |
console.log(`\nData saved to ${filename}`); | |
} catch (error) { | |
console.error('Error saving to file:', error.message); | |
} | |
} | |
// Execute the function | |
getAllCourses().catch((error) => { | |
console.error('Fatal error:', error); | |
process.exit(1); | |
}); | |
// Error handling | |
process.on('uncaughtException', (error) => { | |
console.error('Uncaught Exception:', error); | |
process.exit(1); | |
}); | |
process.on('unhandledRejection', (error) => { | |
console.error('Unhandled Rejection:', error); | |
process.exit(1); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment