maxrothman · June 20, 2021 18:45
diff --git a/README.md b/README.md
diff --git a/parts2ossia.py b/parts2ossia.py
 #!/usr/bin/env python

 """
 Convert additional staves in musicxml scores into ossias. This is useful for making scores
 searchable by search.py.

 In particular, this script takes a 2-staff file and adds the following code to it:
 <staff-details number="1">
  <staff-type>ossia</staff-type>
 </staff-details>

 This marks staff #1 as an ossia, so that it is recognized as a variant by search.py, so search.py
 can find substrings in and around the variant.

 Installation:
 * Clone the repo
 * Make a virtualenv with python3 and pip install lxml
 """


 from argparse import ArgumentParser
 from io import BytesIO
 from zipfile import ZipFile

 # There's no way to preserve xml and doctype declarations with xml.etree, so we have to use lxml
 from lxml import etree as ET


 def parse_args():
    parser = ArgumentParser(description="""
        Convert a 2-staff musicxml score into a score with a single main staff and an ossia staff,
        as annotated by the <staff-details> element. The convention followed by this script is
        that the first staff is the ossia staff and the second is the main staff.
    """)

    parser.add_argument('file', help="File to modify")
    parser.add_argument('-t', '--test', action='store_true',
        help="Show the modified musicxml instead of saving it")
    
    return parser.parse_args()


 def add_ossia_marker(tree):
    attributes = tree.getroot().find('./part/measure/attributes')
    
    num_staves = int(attributes.find('staves').text)
    if num_staves != 2:
        raise ValueError(f"Expected only 2 staves, got {num_staves}")

    last_clef_idx = [i for i, el in enumerate(attributes) if el.tag == 'clef'][-1]
    attributes.insert(last_clef_idx + 1, ET.fromstring(
        '<staff-details number="1"><staff-type>ossia</staff-type></staff-details>'
    ))


 def tree2str(tree):
    # This appears to be the only way to preserve the xml and doctype declarations
    buffer = BytesIO()
    tree.write(buffer, xml_declaration=True, encoding=tree.docinfo.encoding)
    return buffer.getvalue().decode('utf8')


 def main():
    args = parse_args()
    
    with ZipFile(args.file) as zipf:
        maybe_fnames = [f for f in zipf.namelist() if not f.startswith('META-INF/')]
        assert len(maybe_fnames) == 1, f"More than one relevant file found in {args.file}! Malformed mxl file?"
        fname = maybe_fnames[0]
        
        with zipf.open(fname) as f:
            tree = ET.parse(f)

    add_ossia_marker(tree)

    if args.test:
        print(tree2str(tree))
    else:
        # NB: this drops the META-INF member of the zip archive. Luckily, that doesn't seem to matter.
        with ZipFile(args.file, 'w') as zipf:
            zipf.writestr(fname, tree2str(tree))



 if __name__ == '__main__':
    main()
diff --git a/search.py b/search.py
 #!/usr/bin/env python

 """
 Fuzzy music searcher

 This script takes a music fragment written in music21's TinyNotation, fuzzy-searches through
 the current directory for matching musicxml files, and outputs the names of the files and the
 match percentages of their best-matching subsequence.

 It's sometimes hard to get a TinyNotation string right on the first try, so this script also is
 capable of rendering the input string as an image of music notation. This feature uses iTerm2's
 inline image feature, and thus is not available in other terminal emulators.

 You can find more info on TinyNotation here: https://web.mit.edu/music21/doc/usersGuide/usersGuide_16_tinyNotation.html

 Example:
 $ search.py "a8 b c d' e' f'" -s
 <image of notation>
 $ search.py "a8 b c d' e' f'"
 55%  file1.mxl
 63%  file2.mxl
 95%  file3.mxl

 Installation:
 * Clone the repo
 * Make a virtualenv with python3 and install the requirements: pillow, fuzzywuzzy[speedup], git+https://github.com/maxrothman/music21
 * That's it!

 Notes:

 * Without the [speedup] version of fuzzywuzzy, you'll get incorrect results! By default,
  difflib.SequenceMatcher does automatic junk detection and fuzzywuzzy doesn't expose an option to
  turn it off. using [speedup] makes fuzzywuzzy use python-Levenshtein instead, which doesn't do any
  junk detection (thankfully).

 * This script currently depends on my fork of music21 for ossia detection. You'll have to install
  the fork if you want ossia detection to work, at least until it's merged back into the main music21 repo.
  (See https://github.com/cuthbertLab/music21/pull/449)

 * This script only works on scores that only have a single non-ossia part, and if ossias are
  annotated in musicxml using the <staff-details> element. If you have a multi-part score 
  where the ossia part(s) are not annotated this way, you can use parts2ossia.py to fix it.

 How it works:

 The fuzzy searching works by using music21's search.translateIntervalsAndSpeed() to transform a
 flattened part into a string encoding the relative speeds and intervals of adjacent notes. This way,
 matches don't depend on the notated speed and key of scores. Ossias (aka variants) are activated one
 at a time, transformed, and appended to the string. Then the search string is likewise converted and
 fuzzywuzzy is used to perform a fuzzy substring search for the search string in the score string.
 This is repeated for all *.mxl files in the current directory, and matches are ouptutted.
 """

 from argparse import ArgumentParser
 from base64 import b64encode
 from io import BytesIO
 from pathlib import Path
 import sys
 from textwrap import dedent

 from fuzzywuzzy import fuzz
 import music21 as m21  # Hot spot
 from PIL import Image

 # NB: if something should come up in search and doesn't, make sure there's a .mxl file for it.
 # To check for missing mxl files, use `comm -2 -3 <(basename -s .mscz *.mscz | sort) <(basename -s .mxl *.mxl | sort)`

 def parse_args():
    parser = ArgumentParser(
        description = """
            Search for musicxml files in the current directory that match a TinyNotation-formatted
            music fragment
        """
    )
    parser.add_argument('search_string', help="Music fragment to search for")
    parser.add_argument('-s', '--show', action='store_true',
        help="Show the fragment as an image instead of searching for it. Requires iTerm2.")
    
    return parser.parse_args()


 def score2str(score):
    """
    Translate a m21 score/stream into a searchable string
    """
    # Turn any ossia-like parts into actual variants
    parts = list(score.recurse().getElementsByClass('Part'))
    if parts:
        ossias = [p for p in parts if p.metadata and p.metadata.custom.get('staff-type') == 'ossia']
        
        assert len(score.parts) - len(ossias) == 1, (
            "I don't know how to handle scores with more than 1 non-ossia part! "
            "If the other parts were supposed to be ossias, try running parts2ossia.py on the score."
        )
        main_part = (set(parts) - set(ossias)).pop()
        
        for ossia in ossias:
            m21.variant.mergePartAsOssia(main_part, ossia, ossia.id, inPlace=True)
    else:
        main_part = score


    def part2str(part):
        # Chord symbols count as notes for some reason, so we have to filter them out or else
        # they'll make translateIntervalsAndSpeed blow up
        flat = part.flat.notesAndRests.getElementsNotOfClass(m21.harmony.ChordSymbol)        
        return m21.search.translateIntervalsAndSpeed(flat)

    result = part2str(main_part)
    
    # Search each variant separately
    for variant in main_part.variants:
        variant_part = main_part.activateVariants(variant.groups[0])

        result += '|||||' + part2str(variant_part)
    
    return result


 # The cutoff is arbitrary, feel free to change it
 def search(search_str, path='.', cutoff=50):
    results = []
    for mxl_file in Path(path).glob('*.mxl'):
        work = m21.converter.parseFile(mxl_file)  # Hot spot
        
        try:
            work_str = score2str(work)
        except AssertionError as e:
            raise AssertionError(f'{str(e)}: {mxl_file}')

        match_pct = fuzz.partial_ratio(search_str, work_str)
        if match_pct > cutoff:
            results.append((mxl_file.name, match_pct))

    return sorted(results, key=lambda x: x[1])


 #TODO: leaves the cursor a weird color until next newline
 def show_iterm(score):
    """
    Use iTerm2's inline image protocol to show an image of rendered music notation
    See <https://www.iterm2.com/documentation-images.html> for details
    """
    file_name = score.write('musicxml.png')
    
    # By default, the score image has a transparent background, which doesn't show up great on
    # a black terminal background. Instead, give it a white background.
    # Stolen from https://stackoverflow.com/questions/9166400/convert-rgba-png-to-rgb-with-pil/9459208
    png = Image.open(file_name)
    png.load()
    background = Image.new('RGB', png.size, (255, 255, 255))
    background.paste(png, mask=png.split()[3])
        
    buffer = BytesIO()
    background.save(buffer, 'PNG')

    sys.stdout.buffer.write(b"\033]1337;File=inline=1:" + b64encode(buffer.getvalue()))


 def main():
    args = parse_args()
    search_fragment = m21.converter.parse(f'tinynotation: {args.search_string}')
    if args.show:
        show_iterm(search_fragment)
    else:
        results = search(score2str(search_fragment))
        print('\n'.join([f'{int(r[1])}%  {r[0]}' for r in results]))
    


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	"""
	Convert additional staves in musicxml scores into ossias. This is useful for making scores
	searchable by search.py.

	In particular, this script takes a 2-staff file and adds the following code to it:
	<staff-details number="1">
	<staff-type>ossia</staff-type>
	</staff-details>

	This marks staff #1 as an ossia, so that it is recognized as a variant by search.py, so search.py
	can find substrings in and around the variant.

	Installation:
	* Clone the repo
	* Make a virtualenv with python3 and pip install lxml
	"""


	from argparse import ArgumentParser
	from io import BytesIO
	from zipfile import ZipFile

	# There's no way to preserve xml and doctype declarations with xml.etree, so we have to use lxml
	from lxml import etree as ET


	def parse_args():
	parser = ArgumentParser(description="""
	Convert a 2-staff musicxml score into a score with a single main staff and an ossia staff,
	as annotated by the <staff-details> element. The convention followed by this script is
	that the first staff is the ossia staff and the second is the main staff.
	""")

	parser.add_argument('file', help="File to modify")
	parser.add_argument('-t', '--test', action='store_true',
	help="Show the modified musicxml instead of saving it")

	return parser.parse_args()


	def add_ossia_marker(tree):
	attributes = tree.getroot().find('./part/measure/attributes')

	num_staves = int(attributes.find('staves').text)
	if num_staves != 2:
	raise ValueError(f"Expected only 2 staves, got {num_staves}")

	last_clef_idx = [i for i, el in enumerate(attributes) if el.tag == 'clef'][-1]
	attributes.insert(last_clef_idx + 1, ET.fromstring(
	'<staff-details number="1"><staff-type>ossia</staff-type></staff-details>'
	))


	def tree2str(tree):
	# This appears to be the only way to preserve the xml and doctype declarations
	buffer = BytesIO()
	tree.write(buffer, xml_declaration=True, encoding=tree.docinfo.encoding)
	return buffer.getvalue().decode('utf8')


	def main():
	args = parse_args()

	with ZipFile(args.file) as zipf:
	maybe_fnames = [f for f in zipf.namelist() if not f.startswith('META-INF/')]
	assert len(maybe_fnames) == 1, f"More than one relevant file found in {args.file}! Malformed mxl file?"
	fname = maybe_fnames[0]

	with zipf.open(fname) as f:
	tree = ET.parse(f)

	add_ossia_marker(tree)

	if args.test:
	print(tree2str(tree))
	else:
	# NB: this drops the META-INF member of the zip archive. Luckily, that doesn't seem to matter.
	with ZipFile(args.file, 'w') as zipf:
	zipf.writestr(fname, tree2str(tree))



	if __name__ == '__main__':
	main()
	#!/usr/bin/env python

	"""
	Fuzzy music searcher

	This script takes a music fragment written in music21's TinyNotation, fuzzy-searches through
	the current directory for matching musicxml files, and outputs the names of the files and the
	match percentages of their best-matching subsequence.

	It's sometimes hard to get a TinyNotation string right on the first try, so this script also is
	capable of rendering the input string as an image of music notation. This feature uses iTerm2's
	inline image feature, and thus is not available in other terminal emulators.

	You can find more info on TinyNotation here: https://web.mit.edu/music21/doc/usersGuide/usersGuide_16_tinyNotation.html

	Example:
	$ search.py "a8 b c d' e' f'" -s
	<image of notation>
	$ search.py "a8 b c d' e' f'"
	55% file1.mxl
	63% file2.mxl
	95% file3.mxl

	Installation:
	* Clone the repo
	* Make a virtualenv with python3 and install the requirements: pillow, fuzzywuzzy[speedup], git+https://github.com/maxrothman/music21
	* That's it!

	Notes:

	* Without the [speedup] version of fuzzywuzzy, you'll get incorrect results! By default,
	difflib.SequenceMatcher does automatic junk detection and fuzzywuzzy doesn't expose an option to
	turn it off. using [speedup] makes fuzzywuzzy use python-Levenshtein instead, which doesn't do any
	junk detection (thankfully).

	* This script currently depends on my fork of music21 for ossia detection. You'll have to install
	the fork if you want ossia detection to work, at least until it's merged back into the main music21 repo.
	(See https://github.com/cuthbertLab/music21/pull/449)

	* This script only works on scores that only have a single non-ossia part, and if ossias are
	annotated in musicxml using the <staff-details> element. If you have a multi-part score
	where the ossia part(s) are not annotated this way, you can use parts2ossia.py to fix it.

	How it works:

	The fuzzy searching works by using music21's search.translateIntervalsAndSpeed() to transform a
	flattened part into a string encoding the relative speeds and intervals of adjacent notes. This way,
	matches don't depend on the notated speed and key of scores. Ossias (aka variants) are activated one
	at a time, transformed, and appended to the string. Then the search string is likewise converted and
	fuzzywuzzy is used to perform a fuzzy substring search for the search string in the score string.
	This is repeated for all *.mxl files in the current directory, and matches are ouptutted.
	"""

	from argparse import ArgumentParser
	from base64 import b64encode
	from io import BytesIO
	from pathlib import Path
	import sys
	from textwrap import dedent

	from fuzzywuzzy import fuzz
	import music21 as m21 # Hot spot
	from PIL import Image

	# NB: if something should come up in search and doesn't, make sure there's a .mxl file for it.
	# To check for missing mxl files, use `comm -2 -3 <(basename -s .mscz .mscz \| sort) <(basename -s .mxl .mxl \| sort)`

	def parse_args():
	parser = ArgumentParser(
	description = """
	Search for musicxml files in the current directory that match a TinyNotation-formatted
	music fragment
	"""
	)
	parser.add_argument('search_string', help="Music fragment to search for")
	parser.add_argument('-s', '--show', action='store_true',
	help="Show the fragment as an image instead of searching for it. Requires iTerm2.")

	return parser.parse_args()


	def score2str(score):
	"""
	Translate a m21 score/stream into a searchable string
	"""
	# Turn any ossia-like parts into actual variants
	parts = list(score.recurse().getElementsByClass('Part'))
	if parts:
	ossias = [p for p in parts if p.metadata and p.metadata.custom.get('staff-type') == 'ossia']

	assert len(score.parts) - len(ossias) == 1, (
	"I don't know how to handle scores with more than 1 non-ossia part! "
	"If the other parts were supposed to be ossias, try running parts2ossia.py on the score."
	)
	main_part = (set(parts) - set(ossias)).pop()

	for ossia in ossias:
	m21.variant.mergePartAsOssia(main_part, ossia, ossia.id, inPlace=True)
	else:
	main_part = score


	def part2str(part):
	# Chord symbols count as notes for some reason, so we have to filter them out or else
	# they'll make translateIntervalsAndSpeed blow up
	flat = part.flat.notesAndRests.getElementsNotOfClass(m21.harmony.ChordSymbol)
	return m21.search.translateIntervalsAndSpeed(flat)

	result = part2str(main_part)

	# Search each variant separately
	for variant in main_part.variants:
	variant_part = main_part.activateVariants(variant.groups[0])

	result += '\|\|\|\|\|' + part2str(variant_part)

	return result


	# The cutoff is arbitrary, feel free to change it
	def search(search_str, path='.', cutoff=50):
	results = []
	for mxl_file in Path(path).glob('*.mxl'):
	work = m21.converter.parseFile(mxl_file) # Hot spot

	try:
	work_str = score2str(work)
	except AssertionError as e:
	raise AssertionError(f'{str(e)}: {mxl_file}')

	match_pct = fuzz.partial_ratio(search_str, work_str)
	if match_pct > cutoff:
	results.append((mxl_file.name, match_pct))

	return sorted(results, key=lambda x: x[1])


	#TODO: leaves the cursor a weird color until next newline
	def show_iterm(score):
	"""
	Use iTerm2's inline image protocol to show an image of rendered music notation
	See <https://www.iterm2.com/documentation-images.html> for details
	"""
	file_name = score.write('musicxml.png')

	# By default, the score image has a transparent background, which doesn't show up great on
	# a black terminal background. Instead, give it a white background.
	# Stolen from https://stackoverflow.com/questions/9166400/convert-rgba-png-to-rgb-with-pil/9459208
	png = Image.open(file_name)
	png.load()
	background = Image.new('RGB', png.size, (255, 255, 255))
	background.paste(png, mask=png.split()[3])

	buffer = BytesIO()
	background.save(buffer, 'PNG')

	sys.stdout.buffer.write(b"\033]1337;File=inline=1:" + b64encode(buffer.getvalue()))


	def main():
	args = parse_args()
	search_fragment = m21.converter.parse(f'tinynotation: {args.search_string}')
	if args.show:
	show_iterm(search_fragment)
	else:
	results = search(score2str(search_fragment))
	print('\n'.join([f'{int(r[1])}% {r[0]}' for r in results]))



	if __name__ == '__main__':
	main()