Created
May 8, 2016 17:27
-
-
Save curiousercreative/6645963d0ef5dda8494425faaaae5c05 to your computer and use it in GitHub Desktop.
Script to load an iTunes Media Library.xml, find duplicate tracks, remove the track with a larger file size and ensure that the remaining track inherits the older track_id
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import re | |
from xml.etree import ElementTree | |
def get_track_attr (track, attr_name): | |
# iterate over each attr to find the filename | |
attributes = list(track) | |
for a in range(len(attributes)): | |
attr = attributes[a] | |
if attr.text == attr_name: | |
return attributes[a+1].text | |
def set_track_attr (track, attr_name, value): | |
# iterate over each attr to find the filename | |
attributes = list(track) | |
for a in range(len(attributes)): | |
attr = attributes[a] | |
if attr.text == attr_name: | |
attributes[a+1].text = value | |
def are_tracks_duplicate (track, track2): | |
return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist') | |
# dump the xml file into an ElementTree object | |
# TODO: make this a real path to your iTunes Media Library.xml | |
with open('somepath/itunes_library.xml', 'rw') as f: | |
tree = ElementTree.parse(f) | |
root = tree.getroot() | |
tracks_container = root.find('dict').find('dict') | |
tracks = list(tracks_container) | |
# init our lists so that we can separate modifying our tree from analyzing | |
nodes_to_remove = [] | |
nodes_to_update = [] | |
# Iterate over track list in reverse (because we'll be removing items) | |
for t in reversed(xrange(len(tracks))): | |
# only look at the dictionaries | |
if (tracks[t].tag == 'dict'): | |
track = { | |
'track_id': get_track_attr(tracks[t], 'Track ID'), | |
'filesize': get_track_attr(tracks[t], 'Size'), | |
'artist': get_track_attr(tracks[t], 'Artist'), | |
'name': get_track_attr(tracks[t], 'Name'), | |
} | |
print track.get('track_id') | |
# make sure this track hasn't already been flagged for removal | |
if track.get('track_id') not in nodes_to_remove: | |
print 'this track has not already been flagged for removal' | |
# check if track_id is different and name and total time match | |
for t2 in range(len(tracks)): | |
# only look at the dictionaries | |
if (tracks[t2].tag == 'dict'): | |
track2 = { | |
'track_id': get_track_attr(tracks[t2], 'Track ID'), | |
'filesize': get_track_attr(tracks[t2], 'Size'), | |
'artist': get_track_attr(tracks[t], 'Artist'), | |
'name': get_track_attr(tracks[t2], 'Name'), | |
} | |
# find duplicates | |
if (are_tracks_duplicate(track, track2)): | |
print 'found duplicate!' | |
# then compare sizes | |
if int(track.get('filesize')) <= int(track2.get('filesize')): | |
# the new one is smaller, remove the old one | |
nodes_to_remove.append(track2.get('track_id')) | |
# track id inheritance | |
if int(track.get('track_id')) > int(track2.get('track_id')): | |
# mark the new track to update id | |
nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')}) | |
# remove from list | |
tracks.remove(tracks[t2]) | |
else: | |
# mark the old track for removal | |
nodes_to_remove.append(track.get('track_id')) | |
# track id inheritance | |
if int(track.get('track_id')) < int(track2.get('track_id')): | |
# mark the new track to update id | |
nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')}) | |
# remove from list | |
tracks.remove(tracks[t]) | |
print len(nodes_to_remove) | |
break | |
# # save our list of track ids to delete | |
# with open('somepath/tracks_to_delete.json', 'wb') as outfile: | |
# json.dump(nodes_to_remove, outfile) | |
# | |
# # save our list of track dictionaries for swapping | |
# with open('somepath/tracks_to_update.json', 'wb') as outfile: | |
# json.dump(nodes_to_update, outfile) | |
# # load our track ids | |
# with open('somepath/tracks_to_delete.json', 'r') as outfile: | |
# nodes_to_remove = json.load(outfile) | |
# | |
# with open('somepath/tracks_to_update.json', 'r') as outfile: | |
# nodes_to_update = json.load(outfile) | |
# delete the old tracks | |
# reverse the list so we don't run into index out of bounds problems | |
# we modified tracks previously, let's start fresh | |
tracks = list(tracks_container) | |
for t in reversed(xrange(len(tracks))): | |
track = tracks[t] | |
# remove the track id key object | |
if track.tag == 'key' and track.text in nodes_to_remove: | |
print 'removing track key' | |
print track.text | |
# remove from xml | |
tracks.remove(track) | |
# remove the dict | |
elif track.tag == 'dict': | |
track_dict = { | |
'track_id': get_track_attr(track, 'Track ID'), | |
'location': get_track_attr(track, 'Location'), | |
} | |
if track_dict.get('track_id') in nodes_to_remove: | |
print 'removing from xml tree' | |
# remove from xml | |
tracks.remove(track) | |
# get the filenames | |
filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ') | |
print 'deleting file:' | |
print filepath | |
try: | |
os.remove(filepath) | |
except (OSError, IOError) as e: | |
# ignore errors | |
pass | |
# update the nodes | |
for n in nodes_to_update: | |
print 'updating nodes' | |
track_id = n.get('track_id') | |
new_track_id = n.get('new_track_id') | |
for t in range(len(tracks)): | |
track = tracks[t] | |
if track.tag == 'key' and track.text == track_id: | |
print 'found key match' | |
track.text = new_track_id | |
elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id: | |
print 'found dict match' | |
set_track_attr(track, 'Track ID', new_track_id) | |
break | |
# update our xml tree with the list | |
tracks_container.clear() | |
tracks_container.extend(tracks) | |
# save the changes we've made | |
# TODO: fill in a file path for the modified library file | |
# tree.write('somepath/itunes_library-modified.xml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment