Last active
September 14, 2021 21:29
-
-
Save jeamland/c856e9993008c9611a9910a3b22f9479 to your computer and use it in GitHub Desktop.
iTunes Library (non-XML) playlist extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Extract playlists from a non-XML iTunes Library file (.itl) | |
# Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence. | |
# Important information on the encryption used in the .itl file found here: | |
# https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1 | |
# Highly useful information on the .itl format itself found here: | |
# https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java | |
import argparse | |
import collections | |
import csv | |
import enum | |
import io | |
import struct | |
import zlib | |
from Crypto.Cipher import AES | |
HEADER_LENGTH = 0x90 | |
CRYPTO_KEY = b'BHUILuilfghuila3' | |
Hdfm = collections.namedtuple('Hdfm', field_names=[ | |
'file_length', | |
'version', | |
]) | |
Hdsm = collections.namedtuple('Hdsm', field_names=[ | |
'block_type', | |
'block_length', | |
]) | |
Hghm = collections.namedtuple('Hghm', field_names=[]) | |
Hohm = collections.namedtuple('Hohm', field_names=[ | |
'record_length', | |
'type', | |
'data', | |
]) | |
Halm = collections.namedtuple('Hghm', field_names=[]) | |
Haim = collections.namedtuple('Haim', field_names=[]) | |
Hilm = collections.namedtuple('Hilm', field_names=[]) | |
Hiim = collections.namedtuple('Hiim', field_names=[]) | |
Htlm = collections.namedtuple('Htlm', field_names=[]) | |
Htim = collections.namedtuple('Htim', field_names=[ | |
'record_length', | |
'sub_blocks', | |
'song_id', | |
'block_type', | |
# 'file_type', | |
# 'playtime', | |
# 'track_number', | |
# 'track_total', | |
# 'year', | |
# 'bit_rate', | |
# 'sample_rate', | |
# 'volume_adjustment', | |
# 'start_time', | |
# 'end_time', | |
# 'play_count', | |
# 'compilation', | |
# 'last_played', | |
# 'disk_number', | |
# 'disk_total', | |
# 'rating', | |
# 'added', | |
]) | |
Hqlm = collections.namedtuple('Hqlm', field_names=[]) | |
Hqim = collections.namedtuple('Hqlm', field_names=[]) | |
Hsts = collections.namedtuple('Hsts', field_names=[]) | |
Hplm = collections.namedtuple('Hplm', field_names=[]) | |
Hpim = collections.namedtuple('Hpim', field_names=[ | |
'item_count', | |
]) | |
Hptm = collections.namedtuple('Hptm', field_names=[ | |
'key', | |
]) | |
Hslm = collections.namedtuple('Hslm', field_names=[]) | |
Hpsm = collections.namedtuple('Hpsm', field_names=[]) | |
Hrlm = collections.namedtuple('Hrlm', field_names=[]) | |
Hrpm = collections.namedtuple('Hrpm', field_names=[]) | |
class HohmType(enum.IntEnum): | |
TITLE = 0x02 | |
ALBUM_TITLE = 0x03 | |
ARTIST = 0x04 | |
PLAYLIST_TITLE = 0x64 | |
HOHM_ODD_TYPES = (0x42, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x192, 0x1f7, 0x1f4, 0x202, 0x320) | |
class ItlIO(io.BytesIO): | |
def __init__(self, *args, **kwargs): | |
self.flipped = False | |
super().__init__(*args, **kwargs) | |
def skip(self, nbytes): | |
self.read(nbytes) | |
def read_ascii(self, nbytes): | |
return self.read(nbytes).decode('ascii') | |
def read_byte(self): | |
return self.read(1)[0] | |
def read_uint(self): | |
if self.flipped: | |
return struct.unpack('<I', self.read(4))[0] | |
else: | |
return struct.unpack('>I', self.read(4))[0] | |
class RecordParser: | |
def __init__(self, data): | |
self.data = ItlIO(data) | |
def parse(self): | |
while True: | |
record_type = self.data.read_ascii(4) | |
if not record_type: | |
return | |
if self.data.flipped: | |
record_type = record_type[-1::-1] | |
method = f'parse_{record_type}' | |
if not hasattr(self, method): | |
method = f'parse_{record_type[-1::-1]}' | |
if not hasattr(self, method): | |
print(self.data.getvalue()[self.data.tell():]) | |
raise ValueError(f"unknown record type: {record_type}") | |
self.data.flipped = True | |
length = self.data.read_uint() | |
data = ItlIO(self.data.read(length - 8)) | |
if self.data.flipped: | |
data.flipped = True | |
yield getattr(self, method)(data) | |
def parse_hdfm(self, data): | |
file_length = data.read_uint() | |
data.skip(4) | |
version_length = data.read_byte() | |
version = data.read_ascii(version_length) | |
return Hdfm(file_length=file_length, | |
version=version) | |
def parse_hdsm(self, data): | |
record_length = data.read_uint() | |
block_type = data.read_uint() | |
if block_type in (4, 22): | |
self.data.skip(record_length - len(data.getvalue()) - 8) | |
return Hdsm(block_type=block_type, block_length=record_length) | |
def parse_hghm(self, data): | |
return Hghm() | |
def parse_hohm(self, data): | |
record_length = data.read_uint() | |
hohm_type = data.read_uint() | |
hohm_data = self.data.read(record_length - len(data.getvalue()) - 8) | |
# print(hex(hohm_type), repr(hohm_data)) | |
if hohm_type not in HOHM_ODD_TYPES: | |
hohm_data = hohm_data[16:] | |
# What even is character encoding? | |
# There might be something telling us what the encoding is but this | |
# is sufficient for current purposes. | |
if len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[0] == 0: | |
hohm_data = hohm_data.decode('utf-16be') | |
elif len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[-1] == 0: | |
hohm_data = hohm_data.decode('utf-16le') | |
else: | |
hohm_data = hohm_data.decode('iso-8859-1') | |
return Hohm(record_length=record_length, type=hohm_type, data=hohm_data) | |
def parse_halm(self, data): | |
return Halm() | |
def parse_haim(self, data): | |
return Haim() | |
def parse_hilm(self, data): | |
return Hilm() | |
def parse_hiim(self, data): | |
return Hiim() | |
def parse_htlm(self, data): | |
return Htlm() | |
def parse_htim(self, data): | |
record_length = data.read_uint() | |
sub_blocks = data.read_uint() | |
song_id = data.read_uint() | |
block_type = data.read_uint() | |
# data = self.data.read(record_length - len(data.getvalue()) - 8) | |
# print(repr(data)) | |
return Htim(record_length, sub_blocks, song_id, block_type) | |
def parse_hqlm(self, data): | |
return Hqlm() | |
def parse_hqim(self, data): | |
return Hqim() | |
def parse_hsts(self, data): | |
return Hsts() | |
def parse_hplm(self, data): | |
return Hplm() | |
def parse_hpim(self, data): | |
data.skip(4 + 4) | |
item_count = data.read_uint() | |
return Hpim(item_count) | |
def parse_hptm(self, data): | |
data.skip(16) | |
key = data.read_uint() | |
return Hptm(key) | |
def parse_hslm(self, data): | |
return Hslm() | |
def parse_hpsm(self, data): | |
return Hpsm() | |
def parse_hrlm(self, data): | |
return Hrlm() | |
def parse_hrpm(self, data): | |
return Hrpm() | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename', nargs='?', default='iTunes Library.itl', | |
help='iTunes Library Filename') | |
args = parser.parse_args() | |
# So it appears that the .itl format, in modern versions of iTunes, has a header | |
# block containing some information, one part of which tells us how much of the | |
# following data is AES/ECB encrypted with a key that's made it around the | |
# Internet a bit. To get at the actual data you need to decrypt that bit in place | |
# then decompress (zlib) the bit after the initial header. After that it's a similar | |
# format to older iTunes library files. | |
itl = open(args.filename, 'rb').read() | |
header = itl[:HEADER_LENGTH] | |
crypt_length = (len(itl) - HEADER_LENGTH) & ~0xf | |
max_crypt_length = struct.unpack('>I', header[0x5C:0x60])[0] | |
crypt_length = min(crypt_length, max_crypt_length) | |
cipher = AES.new(CRYPTO_KEY, AES.MODE_ECB) | |
decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH]) | |
itl = decrypted + itl[max_crypt_length + HEADER_LENGTH:] | |
itl = header + zlib.decompress(itl) | |
track = {} | |
tracks = {} | |
playlist = {} | |
playlists = {} | |
for record in RecordParser(itl).parse(): | |
if type(record) is Htim: | |
if track: | |
tracks[track['song_id']] = track | |
track = {'song_id': record.song_id} | |
elif type(record) is Hohm: | |
if record.type == HohmType.TITLE: | |
track['title'] = record.data | |
elif record.type == HohmType.ALBUM_TITLE: | |
track['album'] = record.data | |
elif record.type == HohmType.ARTIST: | |
track['artist'] = record.data | |
elif record.type == HohmType.PLAYLIST_TITLE: | |
playlist['title'] = record.data | |
elif type(record) is Hpim: | |
if playlist: | |
playlists[playlist['title']] = playlist | |
playlist = {'items': []} | |
elif type(record) is Hptm: | |
playlist['items'].append(record.key) | |
if track: | |
tracks[track['song_id']] = track | |
if playlist: | |
playlists[playlist['title']] = playlist | |
output = csv.writer(open('playlists.csv', 'w')) | |
for title, playlist in playlists.items(): | |
# The playlists I was after had titles of the form 'YYYY-M' or 'YYYY-MM'... | |
if len(title) < 5 or title[0] != '2' or title[4] != '-': | |
continue | |
year, month = title.split('-') | |
# ... and I wanted to make them consistently 'YYYY-MM'. | |
title = f'{year}-{int(month):02d}' | |
for item in (tracks[x] for x in playlist['items']): | |
print(repr(item)) | |
output.writerow([title, item['title'], item['artist'], item.get('album', '')]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Doesn't work anymore, got this error. My understanding of Python is very limited but seems to need some padding? Tried some hacks but to no avail.
decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])
File "/usr/local/lib/python3.9/site-packages/Crypto/Cipher/blockalgo.py", line 295, in decrypt
return self._cipher.decrypt(ciphertext)
ValueError: Input strings must be a multiple of 16 in length