Skip to content

Instantly share code, notes, and snippets.

@kmorrill
Created October 6, 2024 23:51
Show Gist options
  • Save kmorrill/a1129d25904d89b3cffa4e9df26d3c60 to your computer and use it in GitHub Desktop.
Save kmorrill/a1129d25904d89b3cffa4e9df26d3c60 to your computer and use it in GitHub Desktop.
(async function() {
// Utility function to pause execution for a specified duration (in milliseconds)
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
// Function to extract book information based on ASIN
function extractBookInfo(asin) {
// Extract the book title
const titleElement = document.querySelector(`#${asin} h2.a-size-base.a-color-base.a-text-center.kp-notebook-searchable.a-text-bold`);
const title = titleElement ? titleElement.innerText.trim() : 'Title not found';
// Extract the author
const authorElement = document.querySelector(`#${asin} p.a-spacing-base.a-spacing-top-mini.a-text-center.a-size-base.a-color-secondary.kp-notebook-searchable`);
const author = authorElement ? authorElement.innerText.replace('By: ', '').trim() : 'Author not found';
// Extract the last accessed date
const dateElement = document.querySelector(`#kp-notebook-annotated-date-${asin}`);
const lastAccessedDate = dateElement ? dateElement.innerText.trim() : 'Last accessed date not found';
return {
title,
author,
lastAccessedDate
};
}
// Function to extract highlights and notes for the current book view
function extractHighlights() {
const highlights = [];
// Select all highlight containers
const highlightContainers = document.querySelectorAll('div.kp-notebook-highlight');
highlightContainers.forEach(container => {
// Determine the highlight color from the class list
let color = 'Unknown';
if (container.classList.contains('kp-notebook-highlight-yellow')) {
color = 'Yellow';
} else if (container.classList.contains('kp-notebook-highlight-orange')) {
color = 'Orange';
} else if (container.classList.contains('kp-notebook-highlight-blue')) {
color = 'Blue';
}
// Extract the highlight text
const highlightTextElement = container.querySelector('span.a-size-base-plus.a-color-base');
const highlightText = highlightTextElement ? highlightTextElement.innerText.trim() : 'Highlight text not found';
// Extract the page number from the header
const headerElement = container.closest('div.a-row').querySelector('span.kp-notebook-selectable.kp-notebook-metadata');
let pageNumber = 'Page not found';
if (headerElement) {
const headerText = headerElement.innerText;
const pageMatch = headerText.match(/Page:\s*(\d+)/);
if (pageMatch && pageMatch[1]) {
pageNumber = pageMatch[1];
}
}
// Attempt to find an associated note
let note = null;
const noteElement = container.parentElement.querySelector('div.kp-notebook-note span.a-size-base-plus.a-color-base');
if (noteElement && noteElement.innerText.trim() !== '') {
note = noteElement.innerText.trim();
}
highlights.push({
color,
pageNumber,
highlightText,
note
});
});
return highlights;
}
// Function to click a book given its ASIN
function clickBook(asin) {
const bookContainer = document.getElementById(asin);
if (!bookContainer) {
console.warn(`Book container with ASIN ${asin} not found.`);
return;
}
const clickableElement = bookContainer.querySelector('a.a-link-normal.a-text-normal');
if (clickableElement) {
clickableElement.click();
} else {
console.warn(`Clickable element for ASIN ${asin} not found.`);
}
}
// Function to clear the DOM and display JSON data
function displayJSON(data) {
// Clear the existing DOM
document.body.innerHTML = '';
// Create a preformatted text element to display JSON
const pre = document.createElement('pre');
pre.style.whiteSpace = 'pre-wrap'; // Allow wrapping
pre.style.wordWrap = 'break-word';
pre.style.fontSize = '14px';
pre.style.fontFamily = 'monospace';
// Convert data to formatted JSON string
pre.textContent = JSON.stringify(data, null, 2);
// Append to the body
document.body.appendChild(pre);
}
// Main function to process all books
async function processBooks() {
const allBookContainers = document.querySelectorAll('div.kp-notebook-library-each-book');
if (allBookContainers.length === 0) {
console.error('No books found in the library.');
return;
}
// Process all books
const booksToProcess = Array.from(allBookContainers);
const booksData = [];
for (let i = 0; i < booksToProcess.length; i++) {
const book = booksToProcess[i];
const asin = book.id; // ASIN is used as the ID
console.log(`\nProcessing Book ${i + 1}: ASIN ${asin}`);
// Extract book information
const bookInfo = extractBookInfo(asin);
console.log('Book Information:', bookInfo);
// Click the book to load its annotations/highlights
clickBook(asin);
// Wait for 5 seconds to allow annotations to load
console.log('Waiting for annotations to load...');
await sleep(5000);
// Extract highlights and notes
const highlights = extractHighlights();
console.log(`Highlights for "${bookInfo.title}":`, highlights);
// Add to booksData array
booksData.push({
title: bookInfo.title,
author: bookInfo.author,
lastAccessedDate: bookInfo.lastAccessedDate,
highlights: highlights
});
// Optional: Collapse the book's annotations if necessary
// This depends on the page's behavior. If annotations are toggled, you might need to click again.
// Example:
// clickBook(asin);
// Wait for 1 second before proceeding to the next book
await sleep(1000);
}
// After processing all books, display the JSON data
displayJSON(booksData);
console.log('Finished processing all books.');
}
// Execute the main function
processBooks();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment