Skip to content

Instantly share code, notes, and snippets.

@jermspeaks
Last active April 21, 2025 16:28
Show Gist options
  • Save jermspeaks/7f7727ea7e757fbdd1a7fcea34dc9662 to your computer and use it in GitHub Desktop.
Save jermspeaks/7f7727ea7e757fbdd1a7fcea34dc9662 to your computer and use it in GitHub Desktop.
import fs from "fs";
import path from "path";
const SESSION_LIST_URL =
"https://www.abstractsonline.com/oe3/Program/20273/Search/10/Results?page=1&pagesize=50&total_pages=3&total_entries=122&sort=1&order=asc";
const HEADERS_1 = {
Accept: "application/json",
"Content-Type": "application/json",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
Backpack: "2be5f4ce-1db3-45f5-ac01-33d85c347bf8",
Cookie:
"backpack=2be5f4ce-1db3-45f5-ac01-33d85c347bf8; backpackExpiration=Mon%2C%2021%20Apr%202025%2020%3A46%3A53; AWSALB=TeM+pJyO7in2ZX4aLJz/cM5psdjo6UxBakTPTguUf3B08+DAZa+QyxHjwnb+Usf1eaigU4ZGw7Rlx2KNohT4F4xHND9AxIhBwMoBfLD9fCLI87dI7YvOGFzWsOLU; AWSALBCORS=TeM+pJyO7in2ZX4aLJz/cM5psdjo6UxBakTPTguUf3B08+DAZa+QyxHjwnb+Usf1eaigU4ZGw7Rlx2KNohT4F4xHND9AxIhBwMoBfLD9fCLI87dI7YvOGFzWsOLU",
};
async function fetchJSON1(url) {
const res = await fetch(url, { headers: HEADERS_1 });
if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`);
return res.json();
}
const HEADERS = {
Accept: "application/json",
"Content-Type": "application/json",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
Backpack: "2be5f4ce-1db3-45f5-ac01-33d85c347bf8",
Cookie:
"backpack=2be5f4ce-1db3-45f5-ac01-33d85c347bf8; backpackExpiration=Mon%2C%2021%20Apr%202025%2020%3A46%3A53",
};
async function fetchJSON(url) {
const res = await fetch(url, { headers: HEADERS });
if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`);
return res.json();
}
function extractAffiliationFromAuthorBlock(authorBlock) {
if (!authorBlock) return null;
const plainText = authorBlock.replace(/<[^>]+>/g, "").trim();
const parts = plainText.split(". ");
return parts.length > 1 ? parts[1].trim() : null;
}
function toCSV(data) {
const header = [
"Session Title",
"Session Date",
"Start Time",
"Presenter",
"Presentation Title",
"Affiliation",
];
const rows = data.map((row) => [
row.sessionTitle,
row.sessionDate,
row.startTime,
row.presenter,
row.title,
row.affiliation,
]);
return [header, ...rows]
.map((r) =>
r.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(",")
)
.join("\n");
}
async function scrape() {
const sessionsResp = await fetchJSON1(SESSION_LIST_URL);
const sessions = sessionsResp.Results.map((session) => ({
id: session.Id,
title: session.Body.trim(),
datetime: session.Head,
location: session.Foot,
}));
const allPresentations = [];
for (const session of sessions) {
try {
const metadataUrl = `https://www.abstractsonline.com/oe3/Program/20273/Session/${session.id}`;
const presentationsUrl = `${metadataUrl}/presentations`;
const [metadata, presentations] = await Promise.all([
fetchJSON(metadataUrl),
fetchJSON(presentationsUrl),
]);
for (const p of presentations) {
if (!p.PresenterDisplayName) continue;
allPresentations.push({
sessionTitle: metadata.Title,
sessionDate: metadata.Date,
startTime: p.Start,
presenter: p.PresenterDisplayName,
title: p.Title,
affiliation: extractAffiliationFromAuthorBlock(p.AuthorBlock),
});
}
} catch (err) {
console.error(`Failed to process session ${session.id}: ${err.message}`);
}
}
// console.log(JSON.stringify(allPresentations, null, 2));
// Convert to CSV and save to file
const csv = toCSV(allPresentations);
const __dirname = import.meta.dirname;
const outputPath = path.join(__dirname, "./aacr_presenters.csv");
fs.writeFileSync(outputPath, csv);
console.log(`✅ CSV written to ${outputPath}`);
}
scrape().catch(console.error);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment