Last active
March 8, 2020 17:09
-
-
Save falzm/8036d7838aef560c80cd to your computer and use it in GitHub Desktop.
Convert iTunes exported library to JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
from pprint import pprint | |
import json | |
import plistlib as plist | |
import sys | |
library = plist.readPlist(sys.argv[1]) | |
for track_id, track in library['Tracks'].iteritems(): | |
fields = ( | |
'artist', | |
'name', | |
'album', | |
'year', | |
'total_time', | |
'genre', | |
'play_count', | |
'kind', | |
'total_time', | |
'bit_rate', | |
'size', | |
) | |
track = dict(map(lambda (k,v): (k.lower().replace(' ', '_'), v), track.iteritems())) | |
try: | |
track_fields = dict().fromkeys(fields) & track.viewkeys() | |
track = {k: track[k] for k in track_fields } | |
# Filter out non-music files | |
if ('kind' in track.keys() and track['kind'] in ('MPEG-4 video file', 'Ringtone')) or \ | |
('genre' in track.keys() and track['genre'] in ('Podcast', 'Video Games', 'Tech News')): | |
continue | |
# Convert years in strings (required by Elasticsearch to index it as 'date' type) | |
if 'year' in track.keys(): | |
track['year'] = str(track['year']) | |
# Convert Track durations from milliseconds to seconds | |
if 'total_time' in track.keys(): | |
track['total_time'] = int(track['total_time'] / 1000) | |
print(json.dumps(track)) | |
except: | |
continue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi
Can you please explain these steps more clearly.
1.how to create shell script and location and how to exectue it?
2 where to create index file creation,location/path and execution steps?
The more efficient way I've found to index the JSON documents into Elasticsearch is to use its Bulk API. This method requires a bit of shell scripting to bulk the records:
$ while read track; do
echo '{"index":{"_index":"library","_type":"track"}}'
echo $track
done < library.json > bulk
The library Elasticsearch index settings and mapping for the "track" document type looks like this:
$ cat library.index
{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
},
"mappings": {
"track": {
"properties": {
"year": { "format": "year", "type": "date" },
"album": { "index": "not_analyzed", "type": "string" },
"artist": { "index": "not_analyzed", "type": "string" },
"genre": { "index": "not_analyzed", "type": "string" },
"kind": { "index": "not_analyzed", "type": "string" },
"name": { "index": "not_analyzed", "type": "string" },
"play_count": { "type": "long" },
"total_time": { "type": "long" },
"bit_rate": { "type": "long" },
"size": { "type": "long"
}
}
}
}
}