joshes · January 19, 2019 17:51
diff --git a/download-alexa-audio.sh b/download-alexa-audio.sh
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Downloads all of your alexa audio files to `output_dir` with the filenames
 # flattened (vs storing as sub-directories as implied by the file name).
 # 
 # For example, the following id:
 #  - A3S5CH1HU6KAYF:1.0/2019/01/18/17/G090LF1174270LM2/39:31::TNIH_2V.6c04473c-3160-4012-9719-63ca63396288ZXK
 # Will be saved to:
 #  - ${output_dir}/A3S5CH1HU6KAYF_1_0_2019_01_18_17_G090LF1174270LM2_39_31__TNIH_2V_6c04473c-3160-4012-9719-63ca63396288ZXK.wav
 # 
 # Configure:
 #  - Change `update_dir` to match your needs.
 #  - Go to https://alexa.amazon.com and login if needed. 
 #  - Get the value for the request header "Cookie" made to alexa.amazon.com and paste it into `cookie_str` below.
 #    Note that the only cookie keys that are absolutely *required* are:
 #     - ubid
 #     - x-main
 #     - at-main
 #     - sess-at-main
 # 
 # Caveats:
 #  - Only tested on Mac
 #
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 # Where to store the resulting downloads (it *will* be created!)
 output_dir=~/Desktop/my-alexa-files
 mkdir -p ${output_dir}

 cookie_str=''

 api_endpoint=https://alexa.amazon.com/api

 # Makes an alexa api request
 # $1 = api endpoint
 # $2 = where to save the result
 api_request() {
    $(curl \
        -H "Connection: keep-alive" \
        -H "Pragma: no-cache" \
        -H "Cache-Control: no-cache" \
        -H "Upgrade-Insecure-Requests: 1" \
        -H "Accept: application/json" \
        -H "Accept-Encoding: gzip, deflate, br" \
        -H "Accept-Language: en-US,en" \
        -H "Cookie: ${cookie_str}" \
        --compressed \
        --silent \
        -o ${2} \
        "${1}")
 }

 fetch() {
    url="${api_endpoint}/activities-with-range?startTime=$1&endTime=$2&size=50&_=$3"
    api_request $url "/tmp/$3"
    ids=$(cat /tmp/$3 | jq -r .activities[].utteranceId)
    for id in $ids; do
        # Flatten the name instead of putting in subdirectories for easier sorting
        flattened_id=`echo ${id} | tr :./ _`
        api_request "${api_endpoint}/utterance/audio/data?id=${id}" "/tmp/${flattened_id}"
        dst="${output_dir}/${flattened_id}.wav"
        echo $dst
        cp "/tmp/${flattened_id}" ${dst}
    done
 }

 main() {
    # November 14, 2014 12:00:00 AM
    # When the first Alexa came out: https://en.wikipedia.org/wiki/Amazon_Alexa#History
    start_time_ms=1415923200000
    # Batches of 100 hours
    hours_ms=$(( 100 * 60 * 60 * 1000 ))

    tick=$(date +'%s')
    next_end_time_ms=$(( `date +'%s'` * 1000 ))
    fetch $start_time_ms $next_end_time_ms $tick

    while (( $next_end_time_ms > $start_time_ms )); do
        tick=$(( $(date +'%s') + 1 ))
        next_end_time_ms=$(( $next_end_time_ms - $hours_ms ))
        fetch $start_time_ms $next_end_time_ms $tick
    done
 }

 main
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	#
	# Downloads all of your alexa audio files to `output_dir` with the filenames
	# flattened (vs storing as sub-directories as implied by the file name).
	#
	# For example, the following id:
	# - A3S5CH1HU6KAYF:1.0/2019/01/18/17/G090LF1174270LM2/39:31::TNIH_2V.6c04473c-3160-4012-9719-63ca63396288ZXK
	# Will be saved to:
	# - ${output_dir}/A3S5CH1HU6KAYF_1_0_2019_01_18_17_G090LF1174270LM2_39_31__TNIH_2V_6c04473c-3160-4012-9719-63ca63396288ZXK.wav
	#
	# Configure:
	# - Change `update_dir` to match your needs.
	# - Go to https://alexa.amazon.com and login if needed.
	# - Get the value for the request header "Cookie" made to alexa.amazon.com and paste it into `cookie_str` below.
	# Note that the only cookie keys that are absolutely required are:
	# - ubid
	# - x-main
	# - at-main
	# - sess-at-main
	#
	# Caveats:
	# - Only tested on Mac
	#
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	# Where to store the resulting downloads (it will be created!)
	output_dir=~/Desktop/my-alexa-files
	mkdir -p ${output_dir}

	cookie_str=''

	api_endpoint=https://alexa.amazon.com/api

	# Makes an alexa api request
	# $1 = api endpoint
	# $2 = where to save the result
	api_request() {
	$(curl \
	-H "Connection: keep-alive" \
	-H "Pragma: no-cache" \
	-H "Cache-Control: no-cache" \
	-H "Upgrade-Insecure-Requests: 1" \
	-H "Accept: application/json" \
	-H "Accept-Encoding: gzip, deflate, br" \
	-H "Accept-Language: en-US,en" \
	-H "Cookie: ${cookie_str}" \
	--compressed \
	--silent \
	-o ${2} \
	"${1}")
	}

	fetch() {
	url="${api_endpoint}/activities-with-range?startTime=$1&endTime=$2&size=50&_=$3"
	api_request $url "/tmp/$3"
	ids=$(cat /tmp/$3 \| jq -r .activities[].utteranceId)
	for id in $ids; do
	# Flatten the name instead of putting in subdirectories for easier sorting
	flattened_id=`echo ${id} \| tr :./ _`
	api_request "${api_endpoint}/utterance/audio/data?id=${id}" "/tmp/${flattened_id}"
	dst="${output_dir}/${flattened_id}.wav"
	echo $dst
	cp "/tmp/${flattened_id}" ${dst}
	done
	}

	main() {
	# November 14, 2014 12:00:00 AM
	# When the first Alexa came out: https://en.wikipedia.org/wiki/Amazon_Alexa#History
	start_time_ms=1415923200000
	# Batches of 100 hours
	hours_ms=$(( 100 * 60 * 60 * 1000 ))

	tick=$(date +'%s')
	next_end_time_ms=$(( `date +'%s'` * 1000 ))
	fetch $start_time_ms $next_end_time_ms $tick

	while (( $next_end_time_ms > $start_time_ms )); do
	tick=$(( $(date +'%s') + 1 ))
	next_end_time_ms=$(( $next_end_time_ms - $hours_ms ))
	fetch $start_time_ms $next_end_time_ms $tick
	done
	}

	main