pfeilbr · September 11, 2018 17:12
diff --git a/safari-book-to-text.js b/safari-book-to-text.js
 /* convert a safari book (safaribooksonline.com) to text in the browser */
 /* must be logged in and on one of the pages in the book */
 (async () => {

 class Scraper {
 
 sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
 }

 tocSelector() {
     return '.sbo-toc-thumb'
 }

 tocLinkElementsSelector() {
     return '.tocList a'
 }

 
 isTableOfContentsLoaded() {
    return document.querySelectorAll(this.tocLinkElementsSelector()).length > 0
 }
 
 // TOC html is loaded on demand when user clicks it (ajax).  We must load explicitly
 // load if not loaded already
 async loadTableOfContents() {
     const tocSelector = this.tocSelector()
     const tocElement = document.querySelector(tocSelector)
     tocElement.click()
     while (!this.isTableOfContentsLoaded()) {
         await this.sleep(250)
     }
     tocElement.click() // hide/toggle TOC when done
 }

 // get sections, chapters, parts.  this is defined y TOCV
 getSections() {
    const sections = []
    let linkElements = document.querySelectorAll(this.tocLinkElementsSelector())
    linkElements.forEach((linkElement) => {
        sections.push({
            title: linkElement.firstChild.textContent.trim(),
            url: linkElement.href
        })
    })
    return sections
  }

  async fetchURLContent(url) {
    const resp = await fetch(url)
    return await resp.text()      
  }

  async convertBookToText() {
      if (!this.isTableOfContentsLoaded()) {
        await this.loadTableOfContents();    
      }
      
      const sections = this.getSections();
      let output = '';
      const parser = new DOMParser()
      for (let section of sections.slice(0,sections.length-1)) {
        const html = await this.fetchURLContent(section.url)
        const doc = parser.parseFromString(html, "text/html");
        const sectionText = doc.querySelector('#sbo-rt-content').textContent
        //console.log(`title: ${section.title}, length: ${sectionText.length}, text: ${sectionText}`)
        output = `${output}${sectionText}`
      }
      return output
  }

  async bookToText() {
      const text = await this.convertBookToText()
      console.log(text)
  }
    
 }

 const scraper = new Scraper();
 await scraper.bookToText();    

 })()
	/* convert a safari book (safaribooksonline.com) to text in the browser */
	/* must be logged in and on one of the pages in the book */
	(async () => {

	class Scraper {

	sleep(ms) {
	return new Promise(resolve => setTimeout(resolve, ms));
	}

	tocSelector() {
	return '.sbo-toc-thumb'
	}

	tocLinkElementsSelector() {
	return '.tocList a'
	}


	isTableOfContentsLoaded() {
	return document.querySelectorAll(this.tocLinkElementsSelector()).length > 0
	}

	// TOC html is loaded on demand when user clicks it (ajax). We must load explicitly
	// load if not loaded already
	async loadTableOfContents() {
	const tocSelector = this.tocSelector()
	const tocElement = document.querySelector(tocSelector)
	tocElement.click()
	while (!this.isTableOfContentsLoaded()) {
	await this.sleep(250)
	}
	tocElement.click() // hide/toggle TOC when done
	}

	// get sections, chapters, parts. this is defined y TOCV
	getSections() {
	const sections = []
	let linkElements = document.querySelectorAll(this.tocLinkElementsSelector())
	linkElements.forEach((linkElement) => {
	sections.push({
	title: linkElement.firstChild.textContent.trim(),
	url: linkElement.href
	})
	})
	return sections
	}

	async fetchURLContent(url) {
	const resp = await fetch(url)
	return await resp.text()
	}

	async convertBookToText() {
	if (!this.isTableOfContentsLoaded()) {
	await this.loadTableOfContents();
	}

	const sections = this.getSections();
	let output = '';
	const parser = new DOMParser()
	for (let section of sections.slice(0,sections.length-1)) {
	const html = await this.fetchURLContent(section.url)
	const doc = parser.parseFromString(html, "text/html");
	const sectionText = doc.querySelector('#sbo-rt-content').textContent
	//console.log(`title: ${section.title}, length: ${sectionText.length}, text: ${sectionText}`)
	output = `${output}${sectionText}`
	}
	return output
	}

	async bookToText() {
	const text = await this.convertBookToText()
	console.log(text)
	}

	}

	const scraper = new Scraper();
	await scraper.bookToText();

	})()