Skip to content

Instantly share code, notes, and snippets.

@natew
Created September 8, 2012 23:22
Show Gist options
  • Save natew/3680949 to your computer and use it in GitHub Desktop.
Save natew/3680949 to your computer and use it in GitHub Desktop.
Rails model for parsing artist information from a song
class Song
# Regular expressions
RE = {
:featured => /(featuring | ?ft\.? |feat\.? |f\. |w\/){1}/i,
:remixer => / remix| rmx| edit| bootleg| mix| remake| re-work| rework| extended remix| bootleg remix/i,
:mashup_split => / \+ | x | vs\.? /i,
:producer => /^(produced by|prod\.? by |prod\. )/i,
:cover => / cover/i,
:split => /([^,&]+)(& ?([^,&]+)|, ?([^,&]+))*/i, # Splits "one, two & three"
:open => /[\(\[\{]/,
:close => /[\)\]\}]/,
:containers => /[\{\[\(\)\]\}]/i,
:percents => /(% ?){2,10}/,
:remove => /(extended|vip|original|club|vocal) mix|(extended|vip|radio) edit|(on )?soundcloud|(exclusive )?free (download|d\/?l)/i,
:and => /, | & | and /i
}
SPLITS = {
:featured => /#{RE[:featured]}#{RE[:split]}/i,
:producer => /#{RE[:producer]}#{RE[:split]}/i,
:remixer => /#{RE[:split]}#{RE[:remixer]}/i,
:cover => /#{RE[:split]}#{RE[:cover]}/i,
:mashup => /#{RE[:mashup_split]}/i
}
STRIP = {
:remixer => /\'s.*| (vocal|instrumental|(summer|fall|spring|winter)( 2[0-9]{3})?) /i,
:producer => /^by /i
}
PREFIX = {
:featured => /( |\(|\[|\{)/
}
def parse_artists
logger.info "#{id}: #{full_name}"
parse_name = (name || link_info[1]).gsub(RE[:remove], '')
parse_artist = artist_name || link_info[0]
split_and_find_artists(parse_name) | find_artists(parse_artist) | split_and_find_artists(parse_artist)
end
def find_artists(name)
matches = []
search_name = name.gsub(/#{RE[:open]}.*|#{RE[:featured]}.*|#{RE[:producer]}.*/i, '')
if has_mashups(search_name)
find_mashups(search_name) do |artist|
matches.push [artist, :mashup]
end
else
search_name.clean_split(RE[:and]) do |artist|
matches.push [artist.strip, :original]
end
end
matches.push [search_name.strip, :original] if matches.empty?
matches.reject(&:blank?)
end
def split_and_find_artists(name)
matches = []
name.clean_split(RE[:containers]) do |part|
if has_mashups(part)
find_mashups(part) do |artist|
artist.gsub!(RE[:remixer], '')
matches.push [artist, :mashup]
end
else
matches = matches + find_artists_types(part, true)
end
end
matches.reject(&:blank?)
end
def has_mashups(name)
name.scan(RE[:mashup_split]).empty? ? false : true
end
def find_mashups(name)
name.clean_split(RE[:mashup_split]) do |artist|
artist.clean_split(RE[:and]) do |split_artist|
yield split_artist
end
end
end
def find_artists_types(part, container = false)
matches = []
types = [:producer, :featured]
types = types + [:remixer, :cover] if container
scan_for(part, types) do |match|
matches.push match
end
matches.reject(&:empty?)
end
def scan_for(part, types)
types.each do |type|
scan_artists(part, type) do |match|
yield match
end
end
end
def scan_artists(part, type)
return unless part
scan = /#{PREFIX[type]}#{SPLITS[type]}/i
part.clean_scan(scan, RE[type]) do |artist|
artist.gsub!(STRIP[type], '') if STRIP.has_key? type
artist.split(RE[:and]).each do |split|
yield [split, type]
end
end
end
def link_info
if link_text
split = link_text.split(/\s*(-|—|–)\s*/)
split.size >= 3 ? [split[0], split[2]] : [nil,nil]
else
['','']
end
end
end
class String
def clean_split(regex)
self.split(regex).reject(&:blank?).collect(&:strip).each do |part|
yield part
end
end
def clean_scan(regex, reject)
self.scan(regex).flatten.compact.each do |string|
yield string.strip unless string =~ reject or string =~ /^[&,]/
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment