Skip to content

Instantly share code, notes, and snippets.

@PatelUtkarsh
Created December 17, 2024 06:46
Show Gist options
  • Save PatelUtkarsh/c7384af624c2fa9a18b03549b0b530d8 to your computer and use it in GitHub Desktop.
Save PatelUtkarsh/c7384af624c2fa9a18b03549b0b530d8 to your computer and use it in GitHub Desktop.
Extract course code from training gov au - use node 20
import { writeFile } from 'fs/promises';
async function fetchWithTimeout(url, timeout = 30000) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(url, {
signal: controller.signal,
headers: {
Accept: 'application/json',
'User-Agent': 'Node.js/20.13.1 (Training Data Fetcher)',
},
});
clearTimeout(timeoutId);
return response;
} catch (error) {
clearTimeout(timeoutId);
throw error;
}
}
async function getAllCourses() {
const PAGE_SIZE = 100;
const MAX_CONCURRENT_REQUESTS = 100;
const REQUEST_TIMEOUT = 3;
let allCourses = [];
try {
const initialResponse = await fetchWithTimeout(
`https://training.gov.au/api/search/training?api-version=1.0&searchText=&offset=0&pageSize=${PAGE_SIZE}&includeTotalCount=true`,
REQUEST_TIMEOUT
);
if (!initialResponse.ok) {
throw new Error(`HTTP error! status: ${initialResponse.status}`);
}
const initialData = await initialResponse.json();
const totalCount = initialData.totalCount;
allCourses = [...extractCourseData(initialData.data)];
console.log(`Total courses to fetch: ${totalCount}`);
console.log(`Fetched ${allCourses.length} courses...`);
const totalPages = Math.ceil(totalCount / PAGE_SIZE);
const remainingPages = totalPages - 1;
const chunks = [];
for (let i = 1; i <= remainingPages; i += MAX_CONCURRENT_REQUESTS) {
chunks.push(
Array.from(
{ length: Math.min(MAX_CONCURRENT_REQUESTS, remainingPages - i + 1) },
(_, index) => i + index
)
);
}
for (const chunk of chunks) {
const chunkPromises = chunk.map(async (pageNum) => {
const offset = pageNum * PAGE_SIZE;
let retries = 3;
while (retries > 0) {
try {
const response = await fetchWithTimeout(
`https://training.gov.au/api/search/training?api-version=1.0&searchText=&offset=${offset}&pageSize=${PAGE_SIZE}&includeTotalCount=true`,
REQUEST_TIMEOUT
);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
return extractCourseData(data.data);
} catch (error) {
retries--;
if (retries === 0) {
console.error(`Failed to fetch page ${pageNum} after 3 attempts:`, error.message);
return [];
}
console.warn(`Retrying page ${pageNum}, attempts left: ${retries}`);
await new Promise((resolve) => setTimeout(resolve, 2000));
}
}
return [];
});
try {
const results = await Promise.all(chunkPromises);
const newCourses = results.flat();
allCourses = [...allCourses, ...newCourses];
console.log(`Fetched ${allCourses.length} of ${totalCount} courses...`);
const delay = results.some((r) => r.length === 0) ? 2000 : 1000;
await new Promise((resolve) => setTimeout(resolve, delay));
} catch (error) {
console.error('Error processing chunk:', error);
}
}
await saveToFile(allCourses);
console.log('\nFinal Results Summary:');
console.log(`Total courses fetched: ${allCourses.length}`);
console.table(allCourses.slice(0, 10));
return allCourses;
} catch (error) {
console.error('Error fetching courses:', error.message);
if (allCourses.length > 0) {
await saveToFile(allCourses);
}
return allCourses;
}
}
function extractCourseData(data) {
return data.map((course) => ({
code: course.code,
title: course.title,
type: course.type?.name || 'N/A',
status: course.status?.name || 'N/A',
startDate: course.currencyPeriod?.startDate || 'N/A',
endDate: course.currencyPeriod?.endDate || 'N/A',
}));
}
async function saveToFile(courses) {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `courses_${timestamp}.json`;
try {
await writeFile(filename, JSON.stringify(courses, null, 2));
console.log(`\nData saved to ${filename}`);
} catch (error) {
console.error('Error saving to file:', error.message);
}
}
// Execute the function
getAllCourses().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});
// Error handling
process.on('uncaughtException', (error) => {
console.error('Uncaught Exception:', error);
process.exit(1);
});
process.on('unhandledRejection', (error) => {
console.error('Unhandled Rejection:', error);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment