Created
July 2, 2016 14:54
-
-
Save astoeckel/1aee40ea5c2995f655bbeba520b94a7c to your computer and use it in GitHub Desktop.
Small script to copy the song ratings between Rhythmbox databases
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import xml.etree.ElementTree as ET | |
import sys | |
import unicodedata | |
import difflib | |
if len(sys.argv) != 4: | |
print("Usage: copy_rating <OLD DB> <NEW DB> <TARGET>") | |
sys.exit(1) | |
def build_id(root, tags_to_use): | |
res = "" if not isinstance(root.text, str) else root.text.strip() | |
for child in root: | |
if child.tag in tags_to_use: | |
child_id = build_id(child, tags_to_use) | |
if len(res) > 0 and len(child_id) > 0: | |
res = res + '/' + child_id | |
else: | |
res = res + child_id | |
return res | |
def normalise(s): | |
nfkd_form = unicodedata.normalize('NFKD', s) | |
s = nfkd_form.encode('ASCII', 'ignore').strip().lower() | |
return s | |
def build_shingles(s, l=5): | |
s = normalise(s) | |
res = [] | |
if len(s) > 0: | |
for i in range(max(l, len(s)) - l + 1): | |
res.append(s[i:i+l]) | |
return res | |
def has_child(root, tag_name): | |
for x in root.iter(tag_name): | |
return True | |
return False | |
def build_index(index, root, elem_name, tags_to_copy, tags_to_use): | |
for child in root: | |
if child.tag == elem_name: | |
has_tag = False | |
for tag_to_copy in tags_to_copy: | |
if has_child(child, tag_to_copy): | |
has_tag = True | |
if has_tag: | |
for shingle in build_shingles(build_id(child, tags_to_use)): | |
if shingle in index: | |
index[shingle].append(child) | |
else: | |
index[shingle] = [child] | |
else: | |
build_index(index, child, elem_name, tags_to_copy, tags_to_use) | |
def copy_tags(index, root, elem_name, tags_to_copy, tags_to_use): | |
for child in root: | |
if child.tag == elem_name: | |
# Skip elements which already have all the tags that should be | |
# copied to them | |
has_tag = True | |
for tag_to_copy in tags_to_copy: | |
if not has_child(child, tag_to_copy): | |
has_tag = False | |
if has_tag: | |
continue | |
# Build the id and shingles of this element | |
new_id = build_id(child, tags_to_use) | |
shingles = build_shingles(new_id) | |
# Fetch corresponding old ids and write them to a map | |
old_elems = {} | |
for shingle in shingles: | |
if shingle in index: | |
for elem in index[shingle]: | |
old_id = build_id(elem, tags_to_use) | |
old_elems[old_id] = elem | |
# For each old entry calculate the similarity to the new entry, | |
# remember the best one | |
best_sim = 0.0 | |
best_elem = None | |
best_id = "" | |
for old_id in old_elems.keys(): | |
sim = difflib.SequenceMatcher(a=new_id, b=old_id).ratio() | |
if sim > best_sim: | |
best_sim = sim | |
best_elem = old_elems[old_id] | |
best_id = old_id | |
# Copy the given tags to the current element from the best matching | |
if best_sim > 0.95: | |
print(new_id, " --> ", best_id) | |
for tag_to_copy in tags_to_copy: | |
if not has_child(child, tag_to_copy): | |
for src in best_elem.iter(tag_to_copy): | |
child.append(src) | |
print(tag_to_copy, ": ", src.text) | |
break | |
else: | |
copy_tag(index, child, elem_name, tags_to_copy, tags_to_use) | |
print("Loading old and new database...") | |
old_db_tree = ET.parse(sys.argv[1]) | |
new_db_tree = ET.parse(sys.argv[2]) | |
old_db = old_db_tree.getroot() | |
new_db = new_db_tree.getroot() | |
print("Building index...") | |
index = {} | |
tags_to_use = ["title", "artist", "album", "duration"] | |
tags_to_copy = ["rating"] | |
build_index(index, old_db, "entry", tags_to_copy, tags_to_use) | |
print("Copying ratings from the old to the new file...") | |
copy_tags(index, new_db, "entry", tags_to_copy, tags_to_use) | |
print("Writing updated database to file...") | |
new_db_tree.write(sys.argv[3], encoding="UTF-8") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment