curiousercreative · May 8, 2016 17:27
diff --git a/itunes.py b/itunes.py
 import json
 import os
 import re
 from xml.etree import ElementTree

 def get_track_attr (track, attr_name):
    # iterate over each attr to find the filename
    attributes = list(track)
    for a in range(len(attributes)):
        attr = attributes[a]

        if attr.text == attr_name:
            return attributes[a+1].text

 def set_track_attr (track, attr_name, value):
    # iterate over each attr to find the filename
    attributes = list(track)
    for a in range(len(attributes)):
        attr = attributes[a]

        if attr.text == attr_name:
            attributes[a+1].text = value

 def are_tracks_duplicate (track, track2):
    return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')

 # dump the xml file into an ElementTree object
 # TODO: make this a real path to your iTunes Media Library.xml
 with open('somepath/itunes_library.xml', 'rw') as f:
    tree = ElementTree.parse(f)

 root = tree.getroot()
 tracks_container = root.find('dict').find('dict')
 tracks = list(tracks_container)

 # init our lists so that we can separate modifying our tree from analyzing
 nodes_to_remove = []
 nodes_to_update = []

 # Iterate over track list in reverse (because we'll be removing items)
 for t in reversed(xrange(len(tracks))):
    # only look at the dictionaries
    if (tracks[t].tag == 'dict'):
        track = {
            'track_id': get_track_attr(tracks[t], 'Track ID'),
            'filesize': get_track_attr(tracks[t], 'Size'),
            'artist': get_track_attr(tracks[t], 'Artist'),
            'name': get_track_attr(tracks[t], 'Name'),
        }

        print track.get('track_id')

        # make sure this track hasn't already been flagged for removal
        if track.get('track_id') not in nodes_to_remove:
            print 'this track has not already been flagged for removal'

            # check if track_id is different and name and total time match
            for t2 in range(len(tracks)):
                # only look at the dictionaries
                if (tracks[t2].tag == 'dict'):
                    track2 = {
                        'track_id': get_track_attr(tracks[t2], 'Track ID'),
                        'filesize': get_track_attr(tracks[t2], 'Size'),
                        'artist': get_track_attr(tracks[t], 'Artist'),
                        'name': get_track_attr(tracks[t2], 'Name'),
                    }

                    # find duplicates
                    if (are_tracks_duplicate(track, track2)):
                        print 'found duplicate!'
                        # then compare sizes
                        if int(track.get('filesize')) <= int(track2.get('filesize')):
                            # the new one is smaller, remove the old one
                            nodes_to_remove.append(track2.get('track_id'))

                            # track id inheritance
                            if int(track.get('track_id')) > int(track2.get('track_id')):
                                # mark the new track to update id
                                nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})

                            # remove from list
                            tracks.remove(tracks[t2])
                        else:
                            # mark the old track for removal
                            nodes_to_remove.append(track.get('track_id'))

                            # track id inheritance
                            if int(track.get('track_id')) < int(track2.get('track_id')):
                                # mark the new track to update id
                                nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})

                            # remove from list
                            tracks.remove(tracks[t])

                        print len(nodes_to_remove)
                        break

 # # save our list of track ids to delete
 # with open('somepath/tracks_to_delete.json', 'wb') as outfile:
 #     json.dump(nodes_to_remove, outfile)
 #
 # # save our list of track dictionaries for swapping
 # with open('somepath/tracks_to_update.json', 'wb') as outfile:
 #     json.dump(nodes_to_update, outfile)


 # # load our track ids
 # with open('somepath/tracks_to_delete.json', 'r') as outfile:
 #     nodes_to_remove = json.load(outfile)
 #
 # with open('somepath/tracks_to_update.json', 'r') as outfile:
 #     nodes_to_update = json.load(outfile)

 # delete the old tracks
 # reverse the list so we don't run into index out of bounds problems

 # we modified tracks previously, let's start fresh
 tracks = list(tracks_container)

 for t in reversed(xrange(len(tracks))):
    track = tracks[t]

    # remove the track id key object
    if track.tag == 'key' and track.text in nodes_to_remove:
        print 'removing track key'
        print track.text
        # remove from xml
        tracks.remove(track)
    # remove the dict
    elif track.tag == 'dict':
        track_dict = {
            'track_id': get_track_attr(track, 'Track ID'),
            'location': get_track_attr(track, 'Location'),
        }

        if track_dict.get('track_id') in nodes_to_remove:
            print 'removing from xml tree'
            # remove from xml
            tracks.remove(track)

            # get the filenames
            filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')

            print 'deleting file:'
            print filepath
            try:
                os.remove(filepath)
            except (OSError, IOError) as e:
                # ignore errors
                pass

 # update the nodes
 for n in nodes_to_update:
    print 'updating nodes'
    track_id = n.get('track_id')
    new_track_id = n.get('new_track_id')

    for t in range(len(tracks)):
        track = tracks[t]

        if track.tag == 'key' and track.text == track_id:
            print 'found key match'
            track.text = new_track_id
        elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
            print 'found dict match'
            set_track_attr(track, 'Track ID', new_track_id)
            break

 # update our xml tree with the list
 tracks_container.clear()
 tracks_container.extend(tracks)

 # save the changes we've made
 # TODO: fill in a file path for the modified library file
 # tree.write('somepath/itunes_library-modified.xml')
	import json
	import os
	import re
	from xml.etree import ElementTree

	def get_track_attr (track, attr_name):
	# iterate over each attr to find the filename
	attributes = list(track)
	for a in range(len(attributes)):
	attr = attributes[a]

	if attr.text == attr_name:
	return attributes[a+1].text

	def set_track_attr (track, attr_name, value):
	# iterate over each attr to find the filename
	attributes = list(track)
	for a in range(len(attributes)):
	attr = attributes[a]

	if attr.text == attr_name:
	attributes[a+1].text = value

	def are_tracks_duplicate (track, track2):
	return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')

	# dump the xml file into an ElementTree object
	# TODO: make this a real path to your iTunes Media Library.xml
	with open('somepath/itunes_library.xml', 'rw') as f:
	tree = ElementTree.parse(f)

	root = tree.getroot()
	tracks_container = root.find('dict').find('dict')
	tracks = list(tracks_container)

	# init our lists so that we can separate modifying our tree from analyzing
	nodes_to_remove = []
	nodes_to_update = []

	# Iterate over track list in reverse (because we'll be removing items)
	for t in reversed(xrange(len(tracks))):
	# only look at the dictionaries
	if (tracks[t].tag == 'dict'):
	track = {
	'track_id': get_track_attr(tracks[t], 'Track ID'),
	'filesize': get_track_attr(tracks[t], 'Size'),
	'artist': get_track_attr(tracks[t], 'Artist'),
	'name': get_track_attr(tracks[t], 'Name'),
	}

	print track.get('track_id')

	# make sure this track hasn't already been flagged for removal
	if track.get('track_id') not in nodes_to_remove:
	print 'this track has not already been flagged for removal'

	# check if track_id is different and name and total time match
	for t2 in range(len(tracks)):
	# only look at the dictionaries
	if (tracks[t2].tag == 'dict'):
	track2 = {
	'track_id': get_track_attr(tracks[t2], 'Track ID'),
	'filesize': get_track_attr(tracks[t2], 'Size'),
	'artist': get_track_attr(tracks[t], 'Artist'),
	'name': get_track_attr(tracks[t2], 'Name'),
	}

	# find duplicates
	if (are_tracks_duplicate(track, track2)):
	print 'found duplicate!'
	# then compare sizes
	if int(track.get('filesize')) <= int(track2.get('filesize')):
	# the new one is smaller, remove the old one
	nodes_to_remove.append(track2.get('track_id'))

	# track id inheritance
	if int(track.get('track_id')) > int(track2.get('track_id')):
	# mark the new track to update id
	nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})

	# remove from list
	tracks.remove(tracks[t2])
	else:
	# mark the old track for removal
	nodes_to_remove.append(track.get('track_id'))

	# track id inheritance
	if int(track.get('track_id')) < int(track2.get('track_id')):
	# mark the new track to update id
	nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})

	# remove from list
	tracks.remove(tracks[t])

	print len(nodes_to_remove)
	break

	# # save our list of track ids to delete
	# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
	# json.dump(nodes_to_remove, outfile)
	#
	# # save our list of track dictionaries for swapping
	# with open('somepath/tracks_to_update.json', 'wb') as outfile:
	# json.dump(nodes_to_update, outfile)


	# # load our track ids
	# with open('somepath/tracks_to_delete.json', 'r') as outfile:
	# nodes_to_remove = json.load(outfile)
	#
	# with open('somepath/tracks_to_update.json', 'r') as outfile:
	# nodes_to_update = json.load(outfile)

	# delete the old tracks
	# reverse the list so we don't run into index out of bounds problems

	# we modified tracks previously, let's start fresh
	tracks = list(tracks_container)

	for t in reversed(xrange(len(tracks))):
	track = tracks[t]

	# remove the track id key object
	if track.tag == 'key' and track.text in nodes_to_remove:
	print 'removing track key'
	print track.text
	# remove from xml
	tracks.remove(track)
	# remove the dict
	elif track.tag == 'dict':
	track_dict = {
	'track_id': get_track_attr(track, 'Track ID'),
	'location': get_track_attr(track, 'Location'),
	}

	if track_dict.get('track_id') in nodes_to_remove:
	print 'removing from xml tree'
	# remove from xml
	tracks.remove(track)

	# get the filenames
	filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')

	print 'deleting file:'
	print filepath
	try:
	os.remove(filepath)
	except (OSError, IOError) as e:
	# ignore errors
	pass

	# update the nodes
	for n in nodes_to_update:
	print 'updating nodes'
	track_id = n.get('track_id')
	new_track_id = n.get('new_track_id')

	for t in range(len(tracks)):
	track = tracks[t]

	if track.tag == 'key' and track.text == track_id:
	print 'found key match'
	track.text = new_track_id
	elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
	print 'found dict match'
	set_track_attr(track, 'Track ID', new_track_id)
	break

	# update our xml tree with the list
	tracks_container.clear()
	tracks_container.extend(tracks)

	# save the changes we've made
	# TODO: fill in a file path for the modified library file
	# tree.write('somepath/itunes_library-modified.xml')