Last active
September 27, 2018 22:41
-
-
Save SkyLeach/90ab87bf5c4622f3f99f45e30989fbf1 to your computer and use it in GitHub Desktop.
Userscript for extracting data from Audible.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Download Library Data | |
// @namespace audible_userscript | |
// @description Global/Universal functions added to all web pages via (Tamper/Grease)monkey | |
// @include https://www.audible.com/lib* | |
// @version 0.0.1 | |
// @grant GM_addStyle | |
// @require https://code.jquery.com/jquery-3.2.1.slim.min.js | |
// @require https://d3js.org/d3.v4.min.js | |
// ==/UserScript== | |
/*jshint esversion: 6 */ | |
/*vi: ft=javascript ts=2 sw=2 sts=2 cc=100 et*/ | |
/*NOTE: Do not touch above this line*/ | |
(function() { | |
// 'use strict'; // breaks keywords? | |
function AudibleRowParser(row) { | |
//parsing object | |
return { | |
}; | |
} | |
function grab_header() { | |
//trim first 2 and last 3 | |
return $('div.adbl-lib-content div table tbody tr th').map(function(){return $(this).text();}).slice(2, 7); | |
} | |
function collapse_extra_rows() { | |
//collapse the extra rows, but do it on a timeout delay to keep from locking up the page. | |
$('.adbl-lib-multipart-child').hide(); //hide the multiparts.. | |
$('a#collapse_parts').text('[+]'); | |
} | |
function grab_library_titles() { | |
return $('table:nth-child(3) > tbody:nth-child(1) > tr:visible:nth-child(n+2)').map(function(){ | |
return $(this).children('td').map(function(){ | |
if($(this).attr('name') == 'titleInfo') { | |
//select, clone and clean the social overlay text from the title. | |
var clone = $(this).clone(); | |
clone.children('div').replaceWith(''); | |
clone.children('[name=pdfLink]').replaceWith(''); | |
return clone.text().trim(); | |
} | |
return $(this).text().trim(); | |
}).slice(2, 7); | |
}); | |
} | |
function download_library_csv() { | |
var csv = Array.prototype.join.call(grab_header())+'\n'; | |
$.each(grab_library_titles(), function(i,d) { | |
csv += '"' + Array.prototype.join.call(d, '","') + '"\n'; | |
}); | |
download_text_file(csv, filename='library.csv'); | |
} | |
function increment_progress_bar(bar) { | |
//increase a progress bar by 1 slice | |
$(bar).attr('value', parseInt($(bar).attr('value'), 10)+1); | |
} | |
function download_text_file(textData,filename='data.csv',mimetype='text/csv;charset=utf-8;') { | |
//create a blob file, create a link, link the blob, click link, remove link | |
var blob = new Blob([textData], { type: mimetype }); | |
if (navigator.msSaveBlob) { // IE 10+ | |
navigator.msSaveBlob(blob, filename); | |
} else { | |
var link = document.createElement("a"); | |
if (link.download !== undefined) { // feature detection | |
// Browsers that support HTML5 download attribute | |
var url = URL.createObjectURL(blob); | |
link.setAttribute("href", url); | |
link.setAttribute("download", filename); | |
link.style.visibility = 'hidden'; | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
//make sure we revoke the object URL or else we use up memory | |
//since we can't do it now, we have to make it happen later... | |
window.setTimeout(function(){URL.revokeObjectURL(blob);},30000); //30 seconds should do it. | |
URL.revokeObjectURL(blob); | |
} | |
} | |
} | |
//Make this function available... | |
document.download_text_file = download_text_file; | |
document.userscript_data = { | |
init_data : function() { | |
}, | |
clear_data : function() { | |
$.each(this, function(k,v){ | |
switch(typeof this[k]) { | |
case 'string': | |
this[k] = ''; | |
break; | |
case 'object': | |
this[k] = {}; | |
break; | |
case 'number': | |
this[k] = 0; | |
break; | |
default: | |
this[k] = undefined; | |
}}); | |
}, | |
}; | |
$("body").append ( ` | |
<!-- Add HTML here --> | |
` ); | |
//--- CSS styles make it work... | |
GM_addStyle ( ` | |
/* Add styles here */ | |
`); | |
// add an anchor to use for collapsing all of the | |
var new_anchor = $('<a/>', { | |
id : 'collapse_parts', | |
href : '#', | |
text : '[-]', | |
}); | |
$('.adbl-lib-content > div:nth-child(1) > table:nth-child(3) > tbody:nth-child(1) > tr:nth-child(1) > th:nth-child(1)').append(new_anchor); | |
new_anchor.click(collapse_extra_rows); | |
// add an anchor to use for downloading the titles. | |
new_anchor = $('<a/>', { | |
id : 'getcsvlink', | |
href : '#', | |
text : 'Titles to CSV', | |
}); | |
$('.adbl-lib-content > div:nth-child(1) > table:nth-child(3) > tbody:nth-child(1) > tr:nth-child(1) > th:nth-child(2) span').append(new_anchor); | |
new_anchor.click(download_library_csv); | |
})(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import os | |
import calendar | |
import pprint | |
import csv | |
monthsdays = { | |
'January' : 31, | |
'February' : 29, | |
'March' : 31, | |
'April' : 30, | |
'May' : 30, | |
'June' : 31, | |
'July' : 31, | |
'August' : 31, | |
'September' : 30, | |
'October' : 31, | |
'November' : 30, | |
'December' : 31, | |
} | |
newcols = [ | |
'Download Status', | |
'Title', | |
'Author', | |
'Length', | |
'Purchase Date', | |
'Count Helper', | |
'Subtotal', | |
'% of Month', | |
'Subtotal % of Month', | |
'Month Name', | |
] | |
def days_in_month(datestring): | |
return monthsdays[monthname(datestring)] | |
def percentage_of_month(minstring, datestring): | |
return int(minstring)/60/24/days_in_month(datestring)*100 | |
def monthname(datestring): | |
return calendar.month_name[int(datestring.split('-')[0])] | |
if __name__ == '__main__': | |
# check sys.argv[1] for filename | |
if len(sys.argv) < 2: raise Exception('Missing source data argument') | |
filename = os.path.expanduser(sys.argv[1]) | |
if os.path.exists(filename): | |
# ok let's open it and parse the data | |
with open(filename, 'r') as csvfile: | |
csvreader = csv.DictReader(csvfile, delimiter=',', | |
doublequote=False, quotechar='"', dialect='excel', escapechar='\\') | |
csvwriter = csv.DictWriter(sys.stdout, newcols, | |
delimiter=',', doublequote=False,quotechar='"', | |
dialect='excel', escapechar='\\') | |
outrow = {} | |
for rnum,row in enumerate(csvreader): | |
if not rnum: | |
# write headers | |
csvwriter.writerow(dict([(c,c) for c in newcols])) | |
for k in row: | |
if k == 'Length': | |
parts = row[k].split(' ') | |
if len(parts) == 4: | |
outrow[k] = int(parts[0])*60+int(parts[2]) | |
else: | |
outrow[k] = int(parts[0]) * 60 if \ | |
parts[1].startswith('hr') else \ | |
int(parts[0]) | |
else: | |
outrow[k] = row[k] | |
outrow['Count Helper'] = 1 | |
outrow['Subtotal'] = '=SUBTOTAL(103,F%d)' % (rnum+2) | |
outrow['% of Month'] = percentage_of_month(outrow['Length'],row['Purchase Date']) | |
outrow['Subtotal % of Month'] = '=H%d*G%d' % (rnum+2,rnum+2) | |
outrow['Month Name'] = monthname(row['Purchase Date']) | |
csvwriter.writerow(outrow) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment